From 1888a3194a2fd0d48c33b7cf9753fec020a2059e Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Thu, 7 Jun 2018 22:59:54 -0500 Subject: [PATCH 1/7] split up pandas/tests/indexes/test_multi.py #18644 --- pandas/tests/indexes/multi/__init__.py | 0 pandas/tests/indexes/multi/conftest.py | 42 + .../{ => multi}/data/mindex_073.pickle | Bin .../{ => multi}/data/multiindex_v1.pickle | 0 pandas/tests/indexes/multi/test_base.py | 938 +++++ .../tests/indexes/multi/test_constructor.py | 478 +++ pandas/tests/indexes/multi/test_contains.py | 27 + pandas/tests/indexes/multi/test_conversion.py | 152 + pandas/tests/indexes/multi/test_copy.py | 38 + pandas/tests/indexes/multi/test_drop.py | 148 + pandas/tests/indexes/multi/test_format.py | 119 + pandas/tests/indexes/multi/test_get_set.py | 456 +++ pandas/tests/indexes/multi/test_indexing.py | 279 ++ pandas/tests/indexes/multi/test_integrity.py | 484 +++ pandas/tests/indexes/multi/test_join.py | 89 + pandas/tests/indexes/multi/test_monotonic.py | 159 + pandas/tests/indexes/multi/test_names.py | 117 + pandas/tests/indexes/multi/test_operations.py | 900 +++++ pandas/tests/indexes/multi/test_reindex.py | 88 + pandas/tests/indexes/multi/test_sorting.py | 56 + pandas/tests/indexes/test_multi.py | 3342 ----------------- 21 files changed, 4570 insertions(+), 3342 deletions(-) create mode 100644 pandas/tests/indexes/multi/__init__.py create mode 100644 pandas/tests/indexes/multi/conftest.py rename pandas/tests/indexes/{ => multi}/data/mindex_073.pickle (100%) rename pandas/tests/indexes/{ => multi}/data/multiindex_v1.pickle (100%) create mode 100644 pandas/tests/indexes/multi/test_base.py create mode 100644 pandas/tests/indexes/multi/test_constructor.py create mode 100644 pandas/tests/indexes/multi/test_contains.py create mode 100644 pandas/tests/indexes/multi/test_conversion.py create mode 100644 pandas/tests/indexes/multi/test_copy.py create mode 100644 pandas/tests/indexes/multi/test_drop.py create mode 100644 pandas/tests/indexes/multi/test_format.py create mode 100644 pandas/tests/indexes/multi/test_get_set.py create mode 100644 pandas/tests/indexes/multi/test_indexing.py create mode 100644 pandas/tests/indexes/multi/test_integrity.py create mode 100644 pandas/tests/indexes/multi/test_join.py create mode 100644 pandas/tests/indexes/multi/test_monotonic.py create mode 100644 pandas/tests/indexes/multi/test_names.py create mode 100644 pandas/tests/indexes/multi/test_operations.py create mode 100644 pandas/tests/indexes/multi/test_reindex.py create mode 100644 pandas/tests/indexes/multi/test_sorting.py delete mode 100644 pandas/tests/indexes/test_multi.py diff --git a/pandas/tests/indexes/multi/__init__.py b/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 0000000000000..3bc8bf6a391d4 --- /dev/null +++ b/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest +from pandas import Index, MultiIndex + + +@pytest.fixture +def _index(): + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + index_names = ['first', 'second'] + idx = MultiIndex( + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=index_names, + verify_integrity=False + ) + return idx + + +@pytest.fixture +def named_index(_index): + return {'index': _index} + + +@pytest.fixture +def index_names(): + return ['first', 'second'] + + +@pytest.fixture +def _holder(): + return MultiIndex + + +@pytest.fixture +def _compat_props(): + return ['shape', 'ndim', 'size'] diff --git a/pandas/tests/indexes/data/mindex_073.pickle b/pandas/tests/indexes/multi/data/mindex_073.pickle similarity index 100% rename from pandas/tests/indexes/data/mindex_073.pickle rename to pandas/tests/indexes/multi/data/mindex_073.pickle diff --git a/pandas/tests/indexes/data/multiindex_v1.pickle b/pandas/tests/indexes/multi/data/multiindex_v1.pickle similarity index 100% rename from pandas/tests/indexes/data/multiindex_v1.pickle rename to pandas/tests/indexes/multi/data/multiindex_v1.pickle diff --git a/pandas/tests/indexes/multi/test_base.py b/pandas/tests/indexes/multi/test_base.py new file mode 100644 index 0000000000000..de08c366e7bb9 --- /dev/null +++ b/pandas/tests/indexes/multi/test_base.py @@ -0,0 +1,938 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) +from pandas._libs.tslib import iNaT +from pandas.compat import PY3 +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +def verify_pickle(indices): + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + + +def test_pickle_compat_construction(_holder): + # this is testing for pickle compat + if _holder is None: + return + + # need an object to create with + pytest.raises(TypeError, _holder) + + +def test_to_series(_index): + # assert that we are creating a copy of the index + + idx = _index + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + +def test_to_series_with_arguments(_index): + # GH18699 + + # index kwarg + idx = _index + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = _index + s = idx.to_series(name='__test') + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name + + +def test_shift(_index): + + # GH8083 test the base class for shift + idx = _index + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) + + +def test_create_index_existing_name(_index): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + expected = _index + if not isinstance(expected, MultiIndex): + expected.name = 'foo' + result = pd.Index(expected) + tm.assert_index_equal(result, expected) + + result = pd.Index(expected, name='bar') + expected.name = 'bar' + tm.assert_index_equal(result, expected) + else: + expected.names = ['foo', 'bar'] + result = pd.Index(expected) + tm.assert_index_equal( + result, Index(Index([('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object'), + names=['foo', 'bar'])) + + result = pd.Index(expected, names=['A', 'B']) + tm.assert_index_equal( + result, + Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + dtype='object'), names=['A', 'B'])) + + +def test_numeric_compat(_index): + + idx = _index + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assert_raises_regex(TypeError, "cannot perform __rmul__", + lambda: 1 * idx) + + div_err = "cannot perform __truediv__" if PY3 \ + else "cannot perform __div__" + tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + div_err = div_err.replace(' __', ' __r') + tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", + lambda: 1 // idx) + + +def test_logical_compat(_index): + idx = _index + tm.assert_raises_regex(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assert_raises_regex(TypeError, 'cannot perform any', + lambda: idx.any()) + + +def test_boolean_context_compat(_index): + + # boolean context compat + idx = _index + + def f(): + if idx: + pass + + tm.assert_raises_regex(ValueError, 'The truth value of a', f) + + +def test_reindex_base(_index): + idx = _index + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + idx.get_indexer(idx, method='invalid') + + +def test_get_indexer_consistency(named_index): + # See GH 16819 + for name, index in named_index.items(): + if isinstance(index, IntervalIndex): + continue + + if index.is_unique or isinstance(index, CategoricalIndex): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + +def test_ndarray_compat_properties(_index, _compat_props): + idx = _index + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + for prop in _compat_props: + assert getattr(idx, prop) == getattr(values, prop) + + # test for validity + idx.nbytes + idx.values.nbytes + + +def test_dtype_str(indices): + dtype = indices.dtype_str + assert isinstance(dtype, compat.string_types) + assert dtype == str(indices.dtype) + + +def test_repr_max_seq_item_setting(_index): + # GH10182 + idx = _index + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert '...' not in str(idx) + + +def test_wrong_number_names(indices): + def testit(ind): + ind.names = ["apple", "banana", "carrot"] + tm.assert_raises_regex(ValueError, "^Length", testit, indices) + + +def test_hash_error(indices): + index = indices + tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__, hash, indices) + + +def test_copy_name(named_index): + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + + for name, index in compat.iteritems(named_index): + if isinstance(index, MultiIndex): + continue + + first = index.__class__(index, copy=True, name='mario') + second = first.__class__(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == 'mario' + assert second.name == 'mario' + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + if not isinstance(index, CategoricalIndex): + # See gh-13365 + s3 = s1 * s2 + assert s3.index.name == 'mario' + + +def test_ensure_copied_data(named_index): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + for name, index in compat.iteritems(named_index): + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs['freq'] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + continue + + index_type = index.__class__ + result = index_type(index.values, copy=True, **init_kwargs) + tm.assert_index_equal(index, result) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='copy') + + if isinstance(index, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, + **init_kwargs) + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + + +def test_copy_and_deepcopy(indices): + from copy import copy, deepcopy + + if isinstance(indices, MultiIndex): + return + for func in (copy, deepcopy): + idx_copy = func(indices) + assert idx_copy is not indices + assert idx_copy.equals(indices) + + new_copy = indices.copy(deep=True, name="banana") + assert new_copy.name == "banana" + + +def test_unique_na(): + idx = pd.Index([2, np.nan, 2, 1], name='my_index') + expected = pd.Index([2, np.nan, 1], name='my_index') + result = idx.unique() + tm.assert_index_equal(result, expected) + + +def test_sort(indices): + pytest.raises(TypeError, indices.sort) + + +def test_mutability(indices): + if not len(indices): + return + pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) + + +def test_compat(indices): + assert indices.tolist() == list(indices) + + +def test_memory_usage(named_index): + for name, index in compat.iteritems(named_index): + result = index.memory_usage() + if len(index): + index.get_loc(index[0]) + result2 = index.memory_usage() + result3 = index.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(index, (RangeIndex, IntervalIndex)): + assert result2 > result + + if index.inferred_type == 'object': + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 + + +def test_numpy_argsort(named_index): + for k, ind in named_index.items(): + result = np.argsort(ind) + expected = ind.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(ind), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, + np.argsort, ind, axis=1) + + msg = "the 'kind' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, order=('a', 'b')) + + +def test_pickle(indices): + verify_pickle(indices) + original_name, indices.name = indices.name, 'foo' + verify_pickle(indices) + indices.name = original_name + + +def test_take(named_index): + indexer = [4, 3, 0, 2] + for k, ind in named_index.items(): + + # separate + if k in ['boolIndex', 'tuples', 'empty']: + continue + + result = ind.take(indexer) + expected = ind[indexer] + assert result.equals(expected) + + if not isinstance(ind, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + ind.freq + + +def test_take_invalid_kwargs(_index): + idx = _index + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + +def test_setops_errorcases(named_index): + for name, idx in compat.iteritems(named_index): + # # non-iterable input + cases = [0.5, 'xxx'] + methods = [idx.intersection, idx.union, idx.difference, + idx.symmetric_difference] + + for method in methods: + for case in cases: + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) + + +def test_intersection_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.intersection(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.intersection([1, 2, 3]) + + +def test_union_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.union(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.union([1, 2, 3]) + + +def test_difference_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[2:] + second = idx[:4] + answer = idx[4:] + result = first.difference(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.difference(case) + elif isinstance(idx, CategoricalIndex): + pass + elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + assert result.__class__ == answer.__class__ + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) + else: + result = first.difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.difference([1, 2, 3]) + + +def test_symmetric_difference(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[1:] + second = idx[:-1] + if isinstance(idx, CategoricalIndex): + pass + else: + answer = idx[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.symmetric_difference(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) + + +def test_insert_base(named_index): + + for name, idx in compat.iteritems(named_index): + result = idx[1:4] + + if not len(idx): + continue + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(named_index): + + for name, idx in compat.iteritems(named_index): + + if not len(idx): + continue + + if isinstance(idx, RangeIndex): + # tested in class + continue + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + +def test_equals(named_index): + + for name, idx in compat.iteritems(named_index): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(idx, RangeIndex): + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(pd.Series(idx)) + + +def test_equals_op(_index): + # GH9947, GH10637 + index_a = _index + if isinstance(index_a, PeriodIndex): + return + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with tm.assert_raises_regex(ValueError, msg): + series_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_numpy_ufuncs(named_index): + # test ufuncs of numpy 1.9.2. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + # some functions are skipped because it may return different result + # for unicode input depending on numpy version + + for name, idx in compat.iteritems(named_index): + for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + # PeriodIndex behavior should be changed in future version + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # coerces to float (e.g. np.sin) + with np.errstate(all='ignore'): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + + tm.assert_index_equal(result, exp) + assert isinstance(result, pd.Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + + for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + func(idx) + + +def test_hasnans_isnans(named_index): + # GH 11343, added tests for hasnans / isnans + for name, index in named_index.items(): + if isinstance(index, MultiIndex): + pass + else: + idx = index.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(idx), dtype=bool) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert not idx.hasnans + + idx = index.copy() + values = idx.values + + if len(index) == 0: + continue + elif isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans + + +def test_fillna(named_index): + # GH 11343 + for name, index in named_index.items(): + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with tm.assert_raises_regex(TypeError, msg): + idx.fillna([idx[0]]) + + idx = index.copy() + values = idx.values + + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans + + +def test_nulls(named_index): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + for name, index in named_index.items(): + if len(index) == 0: + tm.assert_numpy_array_equal( + index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() + else: + + if not index.hasnans: + tm.assert_numpy_array_equal( + index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + +def test_empty(_index): + # GH 15270 + index = _index + assert not index.empty + assert index[:0].empty + + +def test_join_self_unique(_index, join_type): + index = _index + if index.is_unique: + joined = index.join(index, how=join_type) + assert (index == joined).all() + + +def test_searchsorted_monotonic(indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, IntervalIndex)): + return + + # nothing to test if the index is empty + if indices.empty: + return + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side='right') + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side='left') + + +def test_map(_index): + # callable + index = _index + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) +def test_map_dictlike(_index, mapper): + + index = _index + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) + + identity = mapper(index.values, index) + + # we don't infer to UInt64 for a dict + if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): + expected = index.astype('int64') + else: + expected = index + + result = index.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + +def test_putmask_with_wrong_mask(_index): + # GH18368 + index = _index + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) + 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) - 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask('foo', 1) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py new file mode 100644 index 0000000000000..f4c0552578680 --- /dev/null +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -0,0 +1,478 @@ +# -*- coding: utf-8 -*- + +import re + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import DataFrame, Index, MultiIndex, date_range +from pandas._libs.tslib import Timestamp +from pandas.compat import lrange, range +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +def test_constructor_single_level(): + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] + + +def test_constructor_no_levels(): + tm.assert_raises_regex(ValueError, "non-zero number " + "of levels/labels", + MultiIndex, levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') + with tm.assert_raises_regex(TypeError, both_re): + MultiIndex(levels=[]) + with tm.assert_raises_regex(TypeError, both_re): + MultiIndex(labels=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], [u'one', u'two']] + labels = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = ((['foo'], ['bar'])) + message = "MultiIndex.name must be a hashable type" + tm.assert_raises_regex(TypeError, message, + MultiIndex, levels=levels, + labels=labels, names=names) + + # With .rename() + mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=('foo', 'bar')) + renamed = [['foor'], ['barr']] + tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) + # With .set_names() + tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) + + +def test_constructor_mismatched_label_levels(_index): + labels = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + tm.assert_raises_regex(ValueError, "Length of levels and labels " + "must be the same", MultiIndex, + levels=levels, labels=labels) + length_error = re.compile('>= length of level') + label_error = re.compile(r'Unequal label lengths: \[4, 2\]') + + # important to check that it's looking at the right thing. + with tm.assert_raises_regex(ValueError, length_error): + MultiIndex(levels=[['a'], ['b']], + labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with tm.assert_raises_regex(ValueError, label_error): + MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) + + # external API + with tm.assert_raises_regex(ValueError, length_error): + _index.copy().set_levels([['a'], ['b']]) + + with tm.assert_raises_regex(ValueError, label_error): + _index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + labels = np.array([1, 1, 2, 0, 0, 1, 1]) + val = labels[0] + mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + copy=True) + assert mi.labels[0][0] == val + labels[0] = 15 + assert mi.labels[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +def test_from_arrays(_index): + arrays = [] + for lev, lab in zip(_index.levels, _index.labels): + arrays.append(np.asarray(lev).take(lab)) + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=_index.names) + tm.assert_index_equal(result, _index) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], + ['a', 'b']]) + assert result.levels[0].equals(Index([Timestamp('20130101')])) + assert result.levels[1].equals(Index(['a', 'b'])) + + +def test_from_arrays_iterator(_index): + # GH 18434 + arrays = [] + for lev, lab in zip(_index.levels, _index.labels): + arrays.append(np.asarray(lev).take(lab)) + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=_index.names) + tm.assert_index_equal(result, _index) + + # invalid iterator input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of array-likes."): + MultiIndex.from_arrays(0) + + +def test_from_arrays_index_series_datetimetz(): + idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3, + tz='Asia/Tokyo') + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_timedelta(): + idx1 = pd.timedelta_range('1 days', freq='D', periods=3) + idx2 = pd.timedelta_range('2 hours', freq='H', periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_period(): + idx1 = pd.period_range('2011-01-01', freq='D', periods=3) + idx2 = pd.period_range('2015-01-01', freq='H', periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, + tz='US/Eastern') + idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3) + idx3 = pd.timedelta_range('1 days', freq='D', periods=3) + idx4 = pd.period_range('2011-01-01', freq='D', periods=3) + + result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), + pd.Series(idx2), + pd.Series(idx3), + pd.Series(idx4)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=True) + + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + with tm.assert_raises_regex( + ValueError, "Must pass non-zero number of levels/labels"): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) + expected = Index([], name='A') + tm.assert_index_equal(result.levels[0], expected) + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list('ABC')[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, + names=names) + tm.assert_index_equal(result, expected) + + +def test_from_arrays_invalid_input(): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + for i in invalid_inputs: + pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) + + +def test_from_arrays_different_lengths(): + # see gh-13599 + idx1 = [1, 2, 3] + idx2 = ['a', 'b'] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + idx1 = [] + idx2 = ['a', 'b'] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + idx1 = [1, 2, 3] + idx2 = [] + tm.assert_raises_regex(ValueError, '^all arrays must ' + 'be same length$', + MultiIndex.from_arrays, [idx1, idx2]) + + +def test_from_tuples(): + tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' + 'from empty list', + MultiIndex.from_tuples, []) + + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex(levels=[[1, 3], [2, 4]], + labels=[[0, 1], [0, 1]], + names=['a', 'b']) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) + tm.assert_index_equal(result, expected) + + # input non-iterables + with tm.assert_raises_regex( + TypeError, 'Input must be a list / sequence of tuple-likes.'): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=['a', 'b']) + expected = MultiIndex.from_arrays(arrays=[[], []], + names=['a', 'b']) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(_index): + result = MultiIndex.from_tuples(_index) + assert (result.values == _index.values).all() + + +def test_from_product_empty(): + # 0 levels + with tm.assert_raises_regex( + ValueError, "Must pass non-zero number of levels/labels"): + MultiIndex.from_product([]) + + # 1 level + result = MultiIndex.from_product([[]], names=['A']) + expected = pd.Index([], name='A') + tm.assert_index_equal(result.levels[0], expected) + + # 2 levels + l1 = [[], ['foo', 'bar', 'baz'], []] + l2 = [[], [], ['a', 'b', 'c']] + names = ['A', 'B'] + for first, second in zip(l1, l2): + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], + labels=[[], []], names=names) + tm.assert_index_equal(result, expected) + + # GH12258 + names = ['A', 'B', 'C'] + for N in range(4): + lvl2 = lrange(N) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], + labels=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +def test_from_product_invalid_input(): + invalid_inputs = [1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b']] + for i in invalid_inputs: + pytest.raises(TypeError, MultiIndex.from_product, iterables=i) + + +def test_from_product_datetimeindex(): + dt_index = date_range('2000-01-01', periods=2) + mi = pd.MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( + '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( + '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) + tm.assert_numpy_array_equal(mi.values, etalon) + + +def test_from_product_index_series_categorical(): + # GH13743 + first = ['foo', 'bar'] + for ordered in [False, True]: + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), + ordered=ordered) + expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), + categories=list("bac"), + ordered=ordered) + + for arr in [idx, pd.Series(idx), idx.values]: + result = pd.MultiIndex.from_product([first, arr]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + with tm.assert_raises_regex( + TypeError, "Input must be a list / sequence of iterables."): + MultiIndex.from_product(0) diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py new file mode 100644 index 0000000000000..156ec47a09a38 --- /dev/null +++ b/pandas/tests/indexes/multi/test_contains.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +import pandas as pd +from pandas import MultiIndex + + +def test_contains_top_level(): + midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) + assert 'A' in midx + assert 'A' not in midx._engine + + +def test_contains_with_nat(): + # MI with a NaT + mi = MultiIndex(levels=[['C'], + pd.date_range('2012-01-01', periods=5)], + labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, 'B']) + assert ('C', pd.Timestamp('2012-01-01')) in mi + for val in mi.values: + assert val in mi + + +def test_contains(_index): + assert ('foo', 'two') in _index + assert ('bar', 'two') not in _index + assert None not in _index diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 0000000000000..dbf575129e7c2 --- /dev/null +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import DataFrame, MultiIndex, date_range +from pandas.compat import PY3, range + +from pandas.util.testing import assert_almost_equal + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + with pytest.raises(ValueError): + pd.Index(li, name='abc') + with pytest.raises(ValueError): + pd.Index(li, name='a') + + +def test_tolist(_index): + result = _index.tolist() + exp = list(_index.values) + assert result == exp + + +def test_to_frame(): + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] + index = MultiIndex.from_tuples(tuples, names=['first', 'second']) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + {0: np.repeat(np.arange(5, dtype='int64'), 3), + 1: np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([range(5), + pd.date_range('20130101', periods=3)]) + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_hierarchical(): + index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two')]) + result = index.to_hierarchical(3) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + # K > 1 + result = index.to_hierarchical(3, 2) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + # non-sorted + index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), + (2, 'a'), (2, 'b')], + names=['N1', 'N2']) + + result = index.to_hierarchical(2) + expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), + (1, 'b'), + (2, 'a'), (2, 'a'), + (2, 'b'), (2, 'b')], + names=['N1', 'N2']) + tm.assert_index_equal(result, expected) + assert result.names == index.names + + +def test_legacy_pickle(): + if PY3: + pytest.skip("testing for legacy pickles not " + "support on py3") + + path = tm.get_data_path('multiindex_v1.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + assert obj.equals(obj2) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj), dtype=np.intp) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + +def test_legacy_v2_unpickle(): + + # 0.7.3 -> 0.8.0 format manage + path = tm.get_data_path('mindex_073.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + assert obj.equals(obj2) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj), dtype=np.intp) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + +def test_roundtrip_pickle_with_tz(): + + # GH 8367 + # round-trip of timezone + index = MultiIndex.from_product( + [[1, 2], ['a', 'b'], date_range('20130101', periods=3, + tz='US/Eastern') + ], names=['one', 'two', 'three']) + unpickled = tm.round_trip_pickle(index) + assert index.equal_levels(unpickled) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 0000000000000..097adc25da86c --- /dev/null +++ b/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + + +import pandas.util.testing as tm + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.labels, original.labels) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.labels, original.labels) + assert copy.labels is not original.labels + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(_index): + i_copy = _index.copy() + + assert_multiindex_copied(i_copy, _index) + + +def test_shallow_copy(_index): + i_copy = _index._shallow_copy() + + assert_multiindex_copied(i_copy, _index) + + +def test_view(_index): + i_view = _index.view() + assert_multiindex_copied(i_view, _index) diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 0000000000000..2e4d66d9ef993 --- /dev/null +++ b/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex +from pandas.compat import lrange +from pandas.errors import PerformanceWarning + + +def test_drop(_index): + dropped = _index.drop([('foo', 'two'), ('qux', 'one')]) + + index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) + dropped2 = _index.drop(index) + + expected = _index[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = _index.drop(['bar']) + expected = _index[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = _index.drop('foo') + expected = _index[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([('bar', 'two')]) + pytest.raises(KeyError, _index.drop, [('bar', 'two')]) + pytest.raises(KeyError, _index.drop, index) + pytest.raises(KeyError, _index.drop, ['foo', 'two']) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) + pytest.raises(KeyError, _index.drop, mixed_index) + + # error='ignore' + dropped = _index.drop(index, errors='ignore') + expected = _index[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = _index.drop(mixed_index, errors='ignore') + expected = _index[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = _index.drop(['foo', 'two'], errors='ignore') + expected = _index[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = _index.drop(['foo', ('qux', 'one')]) + expected = _index[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ['foo', ('qux', 'one'), 'two'] + pytest.raises(KeyError, _index.drop, mixed_index) + dropped = _index.drop(mixed_index, errors='ignore') + expected = _index[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(_index): + index = _index[_index.get_loc('foo')] + dropped = index.droplevel(0) + assert dropped.name == 'second' + + index = MultiIndex( + levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], + labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + dropped = index.droplevel(0) + assert dropped.names == ('two', 'three') + + dropped = index.droplevel('two') + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], + labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + + dropped = index[:2].droplevel(['three', 'one']) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + with pytest.raises(ValueError): + index[:2].droplevel(['one', 'two', 'three']) + + with pytest.raises(KeyError): + index[:2].droplevel(['one', 'four']) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) + assert lexsorted_mi.is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], + data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]]) + df = df.pivot_table(index='a', columns=['b', 'c'], values='d') + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi.is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop('a'), + not_lexsorted_mi.drop('a')) + + +def test_dropna(): + # GH 6194 + idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ['a', 'b', 'c', np.nan, 'e']]) + + exp = pd.MultiIndex.from_arrays([[1, 5], + [1, 5], + ['a', 'e']]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how='any'), exp) + + exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], + [1, 2, np.nan, 5], + ['a', 'b', 'c', 'e']]) + tm.assert_index_equal(idx.dropna(how='all'), exp) + + msg = "invalid how option: xxx" + with tm.assert_raises_regex(ValueError, msg): + idx.dropna(how='xxx') diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py new file mode 100644 index 0000000000000..97fc343f01a5f --- /dev/null +++ b/pandas/tests/indexes/multi/test_format.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- + + +import warnings + +import pandas as pd +import pandas.util.testing as tm +from pandas import MultiIndex, compat +from pandas.compat import PY3, range, u + + +def test_format(_index): + _index.format() + _index[:0].format() + + +def test_format_integer_names(): + index = MultiIndex(levels=[[0, 1], [0, 1]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) + index.format(names=True) + + +def test_format_sparse_config(_index): + warn_filters = warnings.filters + warnings.filterwarnings('ignore', category=FutureWarning, + module=".*format") + # GH1538 + pd.set_option('display.multi_sparse', False) + + result = _index.format() + assert result[1] == 'foo two' + + tm.reset_display_options() + + warnings.filters = warn_filters + + +def test_format_sparse_display(): + index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], + labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) + + result = index.format() + assert result[3] == '1 0 0 0' + + +def test_repr_with_unicode_data(): + with pd.core.config.option_context("display.encoding", 'UTF-8'): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\u" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip(): + + mi = MultiIndex.from_product([list('ab'), range(3)], + names=['first', 'second']) + str(mi) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' + + mi_u = MultiIndex.from_product( + [list(u'ab'), range(3)], names=['first', 'second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + # formatting + if PY3: + str(mi) + else: + compat.text_type(mi) + + # long format + mi = MultiIndex.from_product([list('abcdefg'), range(10)], + names=['first', 'second']) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + assert mi.get_level_values('first').inferred_type == 'string' + assert result.get_level_values('first').inferred_type == 'unicode' + + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + +def test_str(): + # tested elsewhere + pass + + +def test_unicode_string_with_unicode(): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + str(idx) + else: + compat.text_type(idx) + + +def test_bytestring_with_unicode(): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + bytes(idx) + else: + str(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py new file mode 100644 index 0000000000000..faeaf35592de4 --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -0,0 +1,456 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import CategoricalIndex, Index, MultiIndex +from pandas.compat import range + + +def test_set_name_methods(_index, index_names): + # so long as these are synonyms, we don't need to test set_names + assert _index.rename == _index.set_names + new_names = [name + "SUFFIX" for name in index_names] + ind = _index.set_names(new_names) + assert _index.names == index_names + assert ind.names == new_names + with tm.assert_raises_regex(ValueError, "^Length"): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + assert res is None + assert ind.names == new_names2 + + # set names for specific level (# GH7792) + ind = _index.set_names(new_names[0], level=0) + assert _index.names == index_names + assert ind.names == [new_names[0], index_names[1]] + + res = ind.set_names(new_names2[0], level=0, inplace=True) + assert res is None + assert ind.names == [new_names2[0], index_names[1]] + + # set names for multiple levels + ind = _index.set_names(new_names, level=[0, 1]) + assert _index.names == index_names + assert ind.names == new_names + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + assert res is None + assert ind.names == new_names2 + + +def test_set_levels_labels_directly(_index): + # setting levels/labels directly raises AttributeError + + levels = _index.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + labels = _index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + with pytest.raises(AttributeError): + _index.levels = new_levels + + with pytest.raises(AttributeError): + _index.labels = new_labels + + +def test_set_levels(_index): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + levels = _index.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + # level changing [w/o mutation] + ind2 = _index.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(_index.levels, levels) + + # level changing [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_levels(new_levels, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = _index.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(_index.levels, levels) + + ind2 = _index.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(_index.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = _index.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(_index.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(_index.levels, levels) + + ind2 = _index.copy() + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(_index.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_levels(new_levels, level=[0, 1], + inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + assert_matching(_index.levels, levels) + + # illegal level changing should not change levels + # GH 13754 + original_index = _index.copy() + for inplace in [True, False]: + with tm.assert_raises_regex(ValueError, "^On"): + _index.set_levels(['c'], level=0, inplace=inplace) + assert_matching(_index.levels, original_index.levels, + check_dtype=True) + + with tm.assert_raises_regex(ValueError, "^On"): + _index.set_labels([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) + assert_matching(_index.labels, original_index.labels, + check_dtype=True) + + with tm.assert_raises_regex(TypeError, "^Levels"): + _index.set_levels('c', level=0, inplace=inplace) + assert_matching(_index.levels, original_index.levels, + check_dtype=True) + + with tm.assert_raises_regex(TypeError, "^Labels"): + _index.set_labels(1, level=0, inplace=inplace) + assert_matching(_index.labels, original_index.labels, + check_dtype=True) + + +def test_set_labels(_index): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + labels = _index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp, dtype=np.int8) + tm.assert_numpy_array_equal(act, exp) + + # label changing [w/o mutation] + ind2 = _index.set_labels(new_labels) + assert_matching(ind2.labels, new_labels) + assert_matching(_index.labels, labels) + + # label changing [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_labels(new_labels, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, new_labels) + + # label changing specific level [w/o mutation] + ind2 = _index.set_labels(new_labels[0], level=0) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(_index.labels, labels) + + ind2 = _index.set_labels(new_labels[1], level=1) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(_index.labels, labels) + + # label changing multiple levels [w/o mutation] + ind2 = _index.set_labels(new_labels, level=[0, 1]) + assert_matching(ind2.labels, new_labels) + assert_matching(_index.labels, labels) + + # label changing specific level [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(_index.labels, labels) + + ind2 = _index.copy() + inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(_index.labels, labels) + + # label changing multiple levels [w/ mutation] + ind2 = _index.copy() + inplace_return = ind2.set_labels(new_labels, level=[0, 1], + inplace=True) + assert inplace_return is None + assert_matching(ind2.labels, new_labels) + assert_matching(_index.labels, labels) + + # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_labels = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples( + [(0, i) for i in new_labels]) + + # [w/o mutation] + result = ind.set_labels(labels=new_labels, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_labels(labels=new_labels, level=1, inplace=True) + assert result.equals(expected) + + +def test_set_levels_labels_names_bad_input(_index): + levels, labels = _index.levels, _index.labels + names = _index.names + + with tm.assert_raises_regex(ValueError, 'Length of levels'): + _index.set_levels([levels[0]]) + + with tm.assert_raises_regex(ValueError, 'Length of labels'): + _index.set_labels([labels[0]]) + + with tm.assert_raises_regex(ValueError, 'Length of names'): + _index.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + _index.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + _index.set_labels(labels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assert_raises_regex(TypeError, 'list-like'): + _index.set_names(names[0]) + + # should have equal lengths + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + _index.set_levels(levels[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'list-like'): + _index.set_levels(levels, level=0) + + # should have equal lengths + with tm.assert_raises_regex(TypeError, 'list of lists-like'): + _index.set_labels(labels[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'list-like'): + _index.set_labels(labels, level=0) + + # should have equal lengths + with tm.assert_raises_regex(ValueError, 'Length of names'): + _index.set_names(names[0], level=[0, 1]) + + with tm.assert_raises_regex(TypeError, 'string'): + _index.set_names(names, level=0) + + +@pytest.mark.parametrize('inplace', [True, False]) +def test_set_names_with_nlevel_1(inplace): + # GH 21149 + # Ensure that .set_names for MultiIndex with + # nlevels == 1 does not raise any errors + expected = pd.MultiIndex(levels=[[0, 1]], + labels=[[0, 1]], + names=['first']) + m = pd.MultiIndex.from_product([[0, 1]]) + result = m.set_names('first', level=0, inplace=inplace) + + if inplace: + result = m + + tm.assert_index_equal(result, expected) + + +def test_set_levels_categorical(): + # GH13854 + index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) + for ordered in [False, True]: + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, 0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], + labels=index.labels) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex(list("bacb"), + categories=cidx.categories, + ordered=cidx.ordered) + tm.assert_index_equal(result_lvl, expected_lvl) + + +def test_set_value_keeps_names(): + # motivating example from #3742 + lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] + lev2 = ['1', '2', '3'] * 2 + idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) + df = pd.DataFrame( + np.random.randn(6, 4), + columns=['one', 'two', 'three', 'four'], + index=idx) + df = df.sort_index() + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') + df.at[('grethe', '4'), 'one'] = 99.34 + assert df._is_copy is None + assert df.index.names == ('Name', 'Number') + + +def test_get_level_number_integer(_index): + _index.names = [1, 0] + assert _index._get_level_number(1) == 0 + assert _index._get_level_number(0) == 1 + pytest.raises(IndexError, _index._get_level_number, 2) + tm.assert_raises_regex(KeyError, 'Level fourth not found', + _index._get_level_number, 'fourth') + + +def test_get_level_values(_index): + result = _index.get_level_values(0) + expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], + name='first') + tm.assert_index_equal(result, expected) + assert result.name == 'first' + + result = _index.get_level_values('first') + expected = _index.get_level_values(0) + tm.assert_index_equal(result, expected) + + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(['A', 'B']), + CategoricalIndex([1, 2, 3])], + labels=[np.array([0, 0, 0, 1, 1, 1]), + np.array([0, 1, 2, 0, 1, 2])]) + + exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) + + +def test_getitem(_index): + # scalar + assert _index[2] == ('bar', 'one') + + # slice + result = _index[2:5] + expected = _index[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = _index[[True, False, True, False, True, True]] + result2 = _index[np.array([True, False, True, False, True, True])] + expected = _index[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(_index): + sorted_idx, _ = _index.sortlevel(0) + assert sorted_idx.get_loc('baz') == slice(3, 4) + assert sorted_idx.get_loc('foo') == slice(0, 2) + + +def test_get_value_duplicates(): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') + + +def test_get_level_values_all_na(): + # GH 17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_int_with_na(): + # GH 17924 + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) + + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1]) + tm.assert_index_equal(result, expected) + + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_unique_index(_index): + idx = _index[[0, 1, 0, 1, 1, 0, 0]] + expected = _index._shallow_copy(idx[[0, 1]]) + + for dropna in [False, True]: + result = idx._get_unique_index(dropna=dropna) + assert result.unique + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py new file mode 100644 index 0000000000000..716bcab00723b --- /dev/null +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- + + +from datetime import timedelta + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex +from pandas.compat import lrange +from pandas.core.indexes.base import InvalidIndexError +from pandas.util.testing import assert_almost_equal + + +def test_slice_locs_partial(_index): + sorted_idx, _ = _index.sortlevel(0) + + result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) + assert result == (1, 5) + + result = sorted_idx.slice_locs(None, ('qux', 'one')) + assert result == (0, 5) + + result = sorted_idx.slice_locs(('foo', 'two'), None) + assert result == (1, len(sorted_idx)) + + result = sorted_idx.slice_locs('bar', 'baz') + assert result == (2, 4) + + +def test_slice_locs(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30))) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + +def test_slice_locs_with_type_mismatch(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, (1, 3)) + tm.assert_raises_regex(TypeError, '^Level type mismatch', + idx.slice_locs, + df.index[5] + timedelta( + seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + idx.slice_locs(df.index[1], (16, "a")) + + +def test_slice_locs_not_sorted(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " + "MultiIndex lexsort depth", + index.slice_locs, (1, 0, 1), (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + +def test_slice_locs_not_contained(): + # some searchsorted action + + index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], + labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], + [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) + + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(1, 5) + assert result == (3, 6) + + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(2, 5) + assert result == (3, 6) + + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) + + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) + + +def test_get_loc(_index): + assert _index.get_loc(('foo', 'two')) == 1 + assert _index.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, _index.get_loc, ('bar', 'two')) + pytest.raises(KeyError, _index.get_loc, 'quux') + + pytest.raises(NotImplementedError, _index.get_loc, 'foo', + method='nearest') + + # 3 levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + pytest.raises(KeyError, index.get_loc, (1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + +def test_get_loc_duplicates(): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert rs == xp + + +def test_get_loc_level(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + pytest.raises(KeyError, index.get_loc_level, (2, 2)) + + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) +def test_get_loc_nan(level, null_val): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, null_val], dtype=type(null_val)) + key[level] = null_val + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_missing_nan(): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + + +@pytest.mark.parametrize('dtype1', [int, float, bool, str]) +@pytest.mark.parametrize('dtype2', [int, float, bool, str]) +def test_get_loc_multiple_dtypes(dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) +def test_get_loc_implicit_cast(level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_cast_bool(): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) + + +def test_get_indexer(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + + msg = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, msg): + idx1.get_indexer(idx2) + + +def test_get_indexer_nearest(): + midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='nearest') + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='pad', tolerance=2) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 0000000000000..c52f71934bc6e --- /dev/null +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- + +import re + + +import pytest + +import numpy as np + +import pandas as pd + +from pandas import DataFrame, MultiIndex, date_range +from pandas.compat import long, lrange, range +from pandas.errors import PerformanceWarning, UnsortedIndexError +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas.util.testing as tm + + +def test_labels_dtypes(): + + # GH 8456 + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + assert i.labels[0].dtype == 'int8' + assert i.labels[1].dtype == 'int8' + + i = MultiIndex.from_product([['a'], range(40)]) + assert i.labels[1].dtype == 'int8' + i = MultiIndex.from_product([['a'], range(400)]) + assert i.labels[1].dtype == 'int16' + i = MultiIndex.from_product([['a'], range(40000)]) + assert i.labels[1].dtype == 'int32' + + i = pd.MultiIndex.from_product([['a'], range(1000)]) + assert (i.labels[0] >= 0).all() + assert (i.labels[1] >= 0).all() + + +def test_values_boxed(): + tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), + (3, pd.Timestamp('2000-01-03')), + (1, pd.Timestamp('2000-01-04')), + (2, pd.Timestamp('2000-01-02')), + (3, pd.Timestamp('2000-01-03'))] + result = pd.MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10 ** 18, 10 ** 18 + 5) + naive = pd.DatetimeIndex(ints) + aware = pd.DatetimeIndex(ints, tz='US/Central') + + idx = pd.MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq='D') + + idx = pd.MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = lrange(70000) + minor_axis = lrange(10) + + major_labels = np.arange(70000) + minor_labels = np.repeat(lrange(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + # inconsistent + major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + assert not index.is_unique + + +def test_hash_collisions(): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], + names=['one', 'two']) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange( + len(index), dtype='intp')) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_dims(): + pass + + +def take_invalid_kwargs(): + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + +def test_isna_behavior(_index): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + with pytest.raises(NotImplementedError): + pd.isna(_index) + + +def test_large_multiindex_error(): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), + columns=['dest']) + with pytest.raises(KeyError): + df_below_1000000.loc[(-1, 0), 'dest'] + with pytest.raises(KeyError): + df_below_1000000.loc[(3, 0), 'dest'] + df_above_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), + columns=['dest']) + with pytest.raises(KeyError): + df_above_1000000.loc[(-1, 0), 'dest'] + with pytest.raises(KeyError): + df_above_1000000.loc[(3, 0), 'dest'] + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = pd.MultiIndex(levels=[["A", "B"], []], + labels=[[1, 0], [-1, -1]], + names=[0, 1]) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], + labels=[[0], [0]], + names=[0, 1]) + idxm = idx0.join(idx1, how='outer') + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + +def test_million_record_attribute_error(): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame({'a': r, 'b': r}, + index=pd.MultiIndex.from_tuples([(x, x) for x in r])) + + with tm.assert_raises_regex(AttributeError, + "'Series' object has no attribute 'foo'"): + df['a'].foo() + + +def test_can_hold_identifiers(_index): + idx = _index + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(_index): + levels, labels = _index.levels, _index.labels + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile('does not support mutable operations') + with tm.assert_raises_regex(TypeError, mutable_regex): + levels[0] = levels[0] + with tm.assert_raises_regex(TypeError, mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with tm.assert_raises_regex(TypeError, mutable_regex): + labels[0] = labels[0] + with tm.assert_raises_regex(TypeError, mutable_regex): + labels[0][0] = labels[0][0] + # and for names + names = _index.names + with tm.assert_raises_regex(TypeError, mutable_regex): + names[0] = names[0] + + +def test_boolean_context_compat2(): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) + common = i1.intersection(i2) + + def f(): + if common: + pass + + tm.assert_raises_regex(ValueError, 'The truth value of a', f) + + +def test_inplace_mutation_resets_values(): + levels = [['a', 'b', 'c'], [4]] + levels2 = [[1, 2, 3], ['a']] + labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, labels=labels) + mi2 = MultiIndex(levels=levels2, labels=labels) + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + assert mi1._tuples is not None + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should kill _tuples + mi1.set_levels(levels2, inplace=True) + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(long(1), 'a')] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + new_values = mi2.set_labels(labels2).values + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc + mi2.set_labels(labels2, inplace=True) + tm.assert_almost_equal(mi2.values, new_values) + + +def test_level_setting_resets_attributes(): + ind = pd.MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert ind.is_monotonic + ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic + + +def test_partial_string_timestamp_multiindex(): + # GH10331 + dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') + abc = ['a', 'b', 'c'] + ix = pd.MultiIndex.from_product([dr, abc]) + df = pd.DataFrame({'c1': range(0, 15)}, index=ix) + idx = pd.IndexSlice + + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + + # partial string matching on a single index + for df_swap in (df.swaplevel(), + df.swaplevel(0), + df.swaplevel(0, 1)): + df_swap = df_swap.sort_index() + just_a = df_swap.loc['a'] + result = just_a.loc['2016-01-01'] + expected = df.loc[idx[:, 'a'], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + # indexing with IndexSlice + result = df.loc[idx['2016-01-01':'2016-02-01', :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError): + df['2016-01-01'] + + # partial string match on year only + result = df.loc['2016'] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc['2016-01-01'] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc['2016-01-02 12'] + expected = df.iloc[9:12] + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[idx[:, '2016-01-02'], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + result = df.loc[('2016-01-01', 'a'), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) + with pytest.raises(KeyError): + df_swap.loc['2016-01-01'] + + # GH12685 (partial string with daily resolution or below) + dr = date_range('2013-01-01', periods=100, freq='D') + ix = MultiIndex.from_product([dr, ['a', 'b']]) + df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) + + result = df.loc[idx['2013-03':'2013-03', :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + foo = pd.DataFrame(np.arange(100).reshape((10, 10))) + bar = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1) + df.index.names = ['fizz', 'buzz'] + + str(df) + expected = pd.DataFrame({'bar': np.arange(100), + 'foo': np.arange(100)}, + index=pd.MultiIndex.from_product( + [range(10), range(10)], + names=['fizz', 'buzz'])) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values('fizz') + expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values('buzz') + expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz') + tm.assert_index_equal(result, expected) + + +def test_unsortedindex(): + # GH 11897 + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, + columns=['one', 'two']) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + with pytest.raises(UnsortedIndexError): + df.loc(axis=0)['z', slice('a')] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)['z', :]) == 2 + + with pytest.raises(KeyError): + df.loc(axis=0)['q', :] + + +def test_unsortedindex_doc_examples(): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 0000000000000..5ea16d6a1243b --- /dev/null +++ b/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Index, MultiIndex + + +@pytest.mark.parametrize('other', + [Index(['three', 'one', 'two']), + Index(['one']), + Index(['one', 'three'])]) +def test_join_level(_index, other, join_type): + join_index, lidx, ridx = other.join(_index, how=join_type, + level='second', + return_indexers=True) + + exp_level = other.join(_index.levels[1], how=join_type) + assert join_index.levels[0].equals(_index.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array( + [x[1] in exp_level for x in _index], dtype=bool) + exp_values = _index.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ('outer', 'inner'): + join_index2, ridx2, lidx2 = \ + _index.join(other, how=join_type, level='second', + return_indexers=True) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(_index): + # some corner cases + idx = Index(['three', 'one', 'two']) + result = idx.join(_index, level='second') + assert isinstance(result, MultiIndex) + + tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", + _index.join, _index, level=1) + + +def test_join_self(_index, join_type): + res = _index + joined = res.join(res, how=join_type) + assert res is joined + + +def test_join_multi(): + # GH 10665 + midx = pd.MultiIndex.from_product( + [np.arange(4), np.arange(4)], names=['a', 'b']) + idx = pd.Index([1, 2, 5], name='b') + + # inner + jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) + exp_idx = pd.MultiIndex.from_product( + [np.arange(4), [1, 2]], names=['a', 'b']) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) + exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, + 1, -1], dtype=np.intp) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 0000000000000..3baa420b0be77 --- /dev/null +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +from pandas import Index, MultiIndex + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), + np.arange(10)], names=['one', 'two']) + assert i.is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values).is_monotonic + assert not Index(i.values)._is_strictly_monotonic_increasing + + # string ordering + i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic + assert not Index(i.values).is_monotonic + assert not i._is_strictly_monotonic_increasing + assert not Index(i.values)._is_strictly_monotonic_increasing + + i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], + ['mom', 'next', 'zenith']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', + 'nl0000289783', + 'nl0000289965', 'nl0000301109']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic + assert not i._is_strictly_monotonic_increasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic + assert Index(i.values).is_monotonic + assert i._is_strictly_monotonic_increasing + assert Index(i.values)._is_strictly_monotonic_increasing + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product([np.arange(9, -1, -1), + np.arange(9, -1, -1)], + names=['one', 'two']) + assert i.is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10), + np.arange(10, 0, -1)], + names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([np.arange(10, 0, -1), + np.arange(10)], names=['one', 'two']) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']]) + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + # string ordering + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['three', 'two', 'one']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert not i.is_monotonic_decreasing + assert not Index(i.values).is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + assert not Index(i.values)._is_strictly_monotonic_decreasing + + i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], + ['zenith', 'next', 'mom']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', + 'nl0000289783', 'lu0197800237', + 'gb00b03mlx29']], + labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=['household_id', 'asset_id']) + + assert not i.is_monotonic_decreasing + assert not i._is_strictly_monotonic_decreasing + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing + assert Index(i.values).is_monotonic_decreasing + assert i._is_strictly_monotonic_decreasing + assert Index(i.values)._is_strictly_monotonic_decreasing + + +def test_is_strictly_monotonic_increasing(): + idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_increasing + assert not idx._is_strictly_monotonic_increasing + + +def test_is_strictly_monotonic_decreasing(): + idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], + labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) + assert idx.is_monotonic_decreasing + assert not idx._is_strictly_monotonic_decreasing diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py new file mode 100644 index 0000000000000..a5be2b2705525 --- /dev/null +++ b/pandas/tests/indexes/multi/test_names.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- + + +import pandas as pd +import pandas.util.testing as tm +from pandas import MultiIndex + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_slice_keep_name(): + x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], + names=['x', 'y']) + assert x[1:].names == x.names + + +def test_index_name_retained(): + # GH9857 + result = pd.DataFrame({'x': [1, 2, 6], + 'y': [2, 2, 8], + 'z': [-5, 0, 5]}) + result = result.set_index('z') + result.loc[10] = [9, 10] + df_expected = pd.DataFrame({'x': [1, 2, 6, 9], + 'y': [2, 2, 8, 10], + 'z': [-5, 0, 5, 10]}) + df_expected = df_expected.set_index('z') + tm.assert_frame_equal(result, df_expected) + + +def test_changing_names(_index): + + # names should be applied to levels + level_names = [level.name for level in _index.levels] + check_level_names(_index, _index.names) + + view = _index.view() + copy = _index.copy() + shallow_copy = _index._shallow_copy() + + # changing names should change level names on object + new_names = [name + "a" for name in _index.names] + _index.names = new_names + check_level_names(_index, new_names) + + # but not on copies + check_level_names(view, level_names) + check_level_names(copy, level_names) + check_level_names(shallow_copy, level_names) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + check_level_names(_index, new_names) + + +def test_take_preserve_name(_index): + taken = _index.take([3, 0, 1]) + assert taken.names == _index.names + + +def test_copy_names(): + # Check that adding a "names" parameter to the copy is honored + # GH14302 + multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) + multi_idx1 = multi_idx.copy() + + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx1.names == ['MyName1', 'MyName2'] + + multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) + + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx2.names == ['NewName1', 'NewName2'] + + multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) + + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ['MyName1', 'MyName2'] + assert multi_idx3.names == ['NewName1', 'NewName2'] + + +def test_names(_index, index_names): + + # names are assigned in setup + names = index_names + level_names = [level.name for level in _index.levels] + assert names == level_names + + # setting bad names on existing + index = _index + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", + list(index.names) + ["third"]) + tm.assert_raises_regex(ValueError, "^Length of names", + setattr, index, "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = _index.levels + major_labels, minor_labels = _index.labels + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) + + # names are assigned + index.names = ["a", "b"] + ind_names = list(index.names) + level_names = [level.name for level in index.levels] + assert ind_names == level_names diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py new file mode 100644 index 0000000000000..dc2df16f0bdbd --- /dev/null +++ b/pandas/tests/indexes/multi/test_operations.py @@ -0,0 +1,900 @@ +# -*- coding: utf-8 -*- + +import warnings +from itertools import product + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import DataFrame, Index, MultiIndex, date_range, period_range +from pandas.compat import PYPY, lrange, lzip, range, u +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.util.testing import assert_copy + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_difference(_index): + + first = _index + result = first.difference(_index[-3:]) + expected = MultiIndex.from_tuples(sorted(_index[:-3].values), + sortorder=0, + names=_index.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: reflexive + result = _index.difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: superset + result = _index[-3:].difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: degenerate + result = _index[:0].difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # names not the same + chunklet = _index[-3:] + chunklet.names = ['foo', 'baz'] + result = first.difference(chunklet) + assert result.names == (None, None) + + # empty, but non-equal + result = _index.difference(_index.sortlevel(1)[0]) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([]) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([('foo', 'one')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( + 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) + expected.names = first.names + assert first.names == result.names + tm.assert_raises_regex(TypeError, "other must be a MultiIndex " + "or a list of tuples", + first.difference, [1, 2, 3, 4, 5]) + + +def test_union(_index): + piece1 = _index[:5][::-1] + piece2 = _index[3:] + + the_union = piece1 | piece2 + + tups = sorted(_index.values) + expected = MultiIndex.from_tuples(tups) + + assert the_union.equals(expected) + + # corner case, pass self or empty thing: + the_union = _index.union(_index) + assert the_union is _index + + the_union = _index.union(_index[:0]) + assert the_union is _index + + # won't work in python 3 + # tuples = _index.values + # result = _index[:4] | tuples[4:] + # assert result.equals(tuples) + + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) + + # result = other.union(_index) + # assert ('foo', 'one') in result + # assert 'B' in result + + # result2 = _index.union(other) + # assert result.equals(result2) + + +def test_intersection(_index): + piece1 = _index[:5][::-1] + piece2 = _index[3:] + + the_int = piece1 & piece2 + tups = sorted(_index[3:5].values) + expected = MultiIndex.from_tuples(tups) + assert the_int.equals(expected) + + # corner case, pass self + the_int = _index.intersection(_index) + assert the_int is _index + + # empty intersection: disjoint + empty = _index[:2] & _index[2:] + expected = _index[:0] + assert empty.equals(expected) + + # can't do in python 3 + # tuples = _index.values + # result = _index & tuples + # assert result.equals(tuples) + + +def test_insert(_index): + # key contained in all levels + new_index = _index.insert(0, ('bar', 'two')) + assert new_index.equal_levels(_index) + assert new_index[0] == ('bar', 'two') + + # key not contained in all levels + new_index = _index.insert(0, ('abc', 'three')) + + exp0 = Index(list(_index.levels[0]) + ['abc'], name='first') + tm.assert_index_equal(new_index.levels[0], exp0) + + exp1 = Index(list(_index.levels[1]) + ['three'], name='second') + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ('abc', 'three') + + # key wrong length + msg = "Item must have length equal to number of levels" + with tm.assert_raises_regex(ValueError, msg): + _index.insert(0, ('foo2',)) + + left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], + columns=['1st', '2nd', '3rd']) + left.set_index(['1st', '2nd'], inplace=True) + ts = left['3rd'].copy(deep=True) + + left.loc[('b', 'x'), '3rd'] = 2 + left.loc[('b', 'a'), '3rd'] = -1 + left.loc[('b', 'b'), '3rd'] = 3 + left.loc[('a', 'x'), '3rd'] = 4 + left.loc[('a', 'w'), '3rd'] = 5 + left.loc[('a', 'a'), '3rd'] = 6 + + ts.loc[('b', 'x')] = 2 + ts.loc['b', 'a'] = -1 + ts.loc[('b', 'b')] = 3 + ts.loc['a', 'x'] = 4 + ts.loc[('a', 'w')] = 5 + ts.loc['a', 'a'] = 6 + + right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], + ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], + ['a', 'w', 5], ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) + right.set_index(['1st', '2nd'], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right['3rd']) + + # GH9250 + idx = [('test1', i) for i in range(5)] + \ + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] + + left = pd.Series(np.linspace(0, 10, 11), + pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[('test', 17)] = 11 + left.loc[('test', 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), + pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_is_all_dates(_index): + assert not _index.is_all_dates + + +def test_is_numeric(_index): + # MultiIndex is never numeric + assert not _index.is_numeric() + + +def test_bounds(_index): + _index._bounds + + +def test_equals_multi(_index): + assert _index.equals(_index) + assert not _index.equals(_index.values) + assert _index.equals(Index(_index.values)) + + assert _index.equal_levels(_index) + assert not _index.equals(_index[:-1]) + assert not _index.equals(_index[-1]) + + # different number of levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3]) + minor_labels = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not _index.equals(index) + assert not _index.equal_levels(index) + + # some of the labels are different + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not _index.equals(index) + + +def test_identical(_index): + mi = _index.copy() + mi2 = _index.copy() + assert mi.identical(mi2) + + mi = mi.set_names(['new1', 'new2']) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(['new1', 'new2']) + assert mi.identical(mi2) + + mi3 = Index(mi.tolist(), names=mi.names) + mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_append(_index): + result = _index[:3].append(_index[3:]) + assert result.equals(_index) + + foos = [_index[:1], _index[1:3], _index[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(_index) + + # empty + result = _index.append([]) + assert result.equals(_index) + + +def test_groupby(_index): + groups = _index.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = _index.get_values().tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = _index.groupby(_index) + exp = {key: [key] for key in _index} + tm.assert_dict_equal(groups, exp) + + +def test_equals_operator(_index): + # GH9785 + assert (_index == _index).all() + + +def test_truncate(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + result = index.truncate(before=1) + assert 'foo' not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + + # after < before + pytest.raises(ValueError, index.truncate, 3, 1) + + +def test_where(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + + def f(): + i.where(True) + + pytest.raises(NotImplementedError, f) + + +def test_where_array_like(): + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + klasses = [list, tuple, np.array, pd.Series] + cond = [False, True] + + for klass in klasses: + def f(): + return i.where(klass(cond)) + pytest.raises(NotImplementedError, f) + + +def test_reorder_levels(_index): + # this blows up + tm.assert_raises_regex(IndexError, '^Too many levels', + _index.reorder_levels, [2, 1, 0]) + + +def test_astype(_index): + expected = _index.copy() + actual = _index.astype('O') + assert_copy(actual.levels, expected.levels) + assert_copy(actual.labels, expected.labels) + check_level_names(actual, expected.names) + + with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): + _index.astype(np.dtype(int)) + + +@pytest.mark.parametrize('ordered', [True, False]) +def test_astype_category(_index, ordered): + # GH 18630 + msg = '> 1 ndim Categorical are not supported at this time' + with tm.assert_raises_regex(NotImplementedError, msg): + _index.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with tm.assert_raises_regex(NotImplementedError, msg): + _index.astype('category') + + +@pytest.mark.parametrize('first_type,second_type', [ + ('int64', 'int64'), + ('datetime64[D]', 'str')]) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame(dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size))) + df = df.groupby(['first', 'second']).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(['first', 'second']).index + tm.assert_index_equal(result, expected) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + with tm.assert_produces_warning(FutureWarning): + result = m.repeat(n=reps) + tm.assert_index_equal(result, expected) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(['foo', 'bar']) + + m = MultiIndex.from_product([ + numbers, names], names=names) + expected = MultiIndex.from_product([ + numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex( + ValueError, msg, np.repeat, m, reps, axis=1) + + +def test_is_(): + mi = MultiIndex.from_tuples(lzip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([lrange(10), lrange(10)]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + mi4.set_levels([lrange(10), lrange(10)], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range('2011-01-01', freq='M', periods=3, ) + dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') + pi = period_range('2011-01', freq='M', periods=3) + + mi = MultiIndex.from_arrays([[1, 2, 3], + [1.1, np.nan, 3.3], + ['a', 'b', 'c'], + dti, dti_tz, pi]) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ['a', 'b', 'c', 'a', 'b', 'c'], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi)]) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z'], + ['x', 'y', 'z'], ['x', 'y', 'z']]) + + res = mi.append(other) + exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], + [1.1, np.nan, 3.3, 'x', 'y', 'z'], + ['a', 'b', 'c', 'x', 'y', 'z'], + dti.append(pd.Index(['x', 'y', 'z'])), + dti_tz.append(pd.Index(['x', 'y', 'z'])), + pi.append(pd.Index(['x', 'y', 'z']))]) + tm.assert_index_equal(res, exp) + + +def test_take_fill_value(): + # GH 12631 + vals = [['A', 'B'], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] + idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + (np.nan, pd.NaT)] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + exp_vals = [('A', pd.Timestamp('2011-01-02')), + ('A', pd.Timestamp('2011-01-01')), + ('B', pd.Timestamp('2011-01-02'))] + expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) + tm.assert_index_equal(result, expected) + + msg = ('When allow_fill=True and fill_value is not None, ' + 'all indices must be >= -1') + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assert_raises_regex(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +def test_iter(_index): + result = list(_index) + expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] + assert result == expected + + +def test_sub(_index): + + first = _index + + # - now raises (previously was set op difference) + with pytest.raises(TypeError): + first - _index[-3:] + with pytest.raises(TypeError): + _index[-3:] - first + with pytest.raises(TypeError): + _index[-3:] - first.tolist() + with pytest.raises(TypeError): + first.tolist() - _index[-3:] + + +def test_nlevels(_index): + assert _index.nlevels == 2 + + +def test_argsort(_index): + result = _index.argsort() + expected = _index.values.argsort() + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize('level0', [['a', 'd', 'b'], + ['a', 'd', 'b', 'unused']]) +@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], + ['w', 'x', 'y', 'z', 'unused']]) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = pd.MultiIndex(levels=[level0, level1], + labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert('unused' not in result.levels[level]) + + +@pytest.mark.parametrize('names', [None, ['first', 'second']]) +def test_unique(names): + mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], + names=names) + + res = mi.unique() + exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], + names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', + '2015-01-01', 'NaT', 'NaT']) + idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', + '2015-01-02', 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) + eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', + 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize('level', [0, 'first', 1, 'second']) +def test_unique_level(_index, level): + # GH #17896 - with level= argument + result = _index.unique(level=level) + expected = _index.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], + names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = pd.MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = pd.Series([True, True]) + result = pd.Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = pd.Series([False, False]) + result = pd.Series(midx > midx) + tm.assert_series_equal(result, expected) + + +@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + + +def test_isin(): + values = [('foo', 2), ('bar', 3), ('quux', 4)] + + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_not_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, False])) + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + + vals_0 = ['foo', 'bar', 'quux'] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + pytest.raises(IndexError, idx.isin, vals_0, level=5) + pytest.raises(IndexError, idx.isin, vals_0, level=-5) + + pytest.raises(KeyError, idx.isin, vals_0, level=1.0) + pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) + pytest.raises(KeyError, idx.isin, vals_1, level='A') + + idx.names = ['A', 'B'] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) + + pytest.raises(KeyError, idx.isin, vals_1, level='C') + + +def test_duplicate_multiindex_labels(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) + + +@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], + ['1', 'a', '1']]) +def test_duplicate_level_names(names): + # GH18872 + pytest.raises(ValueError, pd.MultiIndex.from_product, + [[0, 1]] * 3, names=names) + + # With .rename() + mi = pd.MultiIndex.from_product([[0, 1]] * 3) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names) + + # With .rename(., level=) + mi.rename(names[0], level=1, inplace=True) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names[:2], level=[0, 2]) + + +def test_duplicate_meta_data(): + # GH 10115 + index = MultiIndex( + levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) + + for idx in [index, + index.set_names([None, None]), + index.set_names([None, 'Num']), + index.set_names(['Upper', 'Num']), ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_duplicates(_index): + assert not _index.has_duplicates + assert _index.append(_index).has_duplicates + + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) + assert index.has_duplicates + + # GH 9075 + t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), + (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), + (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), + (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), + (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), + (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), + (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), + (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), + (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), + (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), + (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), + (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), + (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), + (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), + (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), + (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), + (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), + (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] + + index = pd.MultiIndex.from_tuples(t) + assert not index.has_duplicates + + # handle int64 overflow if possible + def check(nlevels, with_nulls): + labels = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + labels[500] = -1 # common nan value + labels = [labels.copy() for i in range(nlevels)] + for i in range(nlevels): + labels[i][500 + i - nlevels // 2] = -1 + + labels += [np.array([-1, 1]).repeat(500)] + else: + labels = [labels] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + index = MultiIndex(levels=levels, labels=labels) + assert not index.has_duplicates + + # with a dup + if with_nulls: + def f(a): + return np.insert(a, 1000, a[0]) + labels = list(map(f, labels)) + index = MultiIndex(levels=levels, labels=labels) + else: + values = index.values.tolist() + index = MultiIndex.from_tuples(values + [values[0]]) + + assert index.has_duplicates + + # no overflow + check(4, False) + check(4, True) + + # overflow possible + check(8, False) + check(8, True) + + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, labels=labels) + + for keep in ['first', 'last', False]: + left = mi.duplicated(keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(left, right) + + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + 2, dtype='bool')) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + lab = product(range(-1, n), range(-1, m)) + mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + len(mi), dtype='bool')) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 0000000000000..071c2c54196bd --- /dev/null +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +from pandas import Index, MultiIndex + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_reindex(_index): + result, indexer = _index.reindex(list(_index[:4])) + assert isinstance(result, MultiIndex) + check_level_names(result, _index[:4].names) + + result, indexer = _index.reindex(list(_index)) + assert isinstance(result, MultiIndex) + assert indexer is None + check_level_names(result, _index.names) + + +def test_reindex_level(_index): + idx = Index(['one']) + + target, indexer = _index.reindex(idx, level='second') + target2, indexer2 = idx.reindex(_index, level='second') + + exp_index = _index.join(idx, level='second', how='right') + exp_index2 = _index.join(idx, level='second', how='left') + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + tm.assert_raises_regex(TypeError, "Fill method not supported", + _index.reindex, _index, + method='pad', level='second') + + tm.assert_raises_regex(TypeError, "Fill method not supported", + idx.reindex, idx, method='bfill', + level='first') + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(_index): + # GH6552 + idx = _index.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ['foo', 'bar'] + assert idx.reindex([])[0].names == ['foo', 'bar'] + assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] + assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(target.values)[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] + assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['foo', 'bar']) + assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] + assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 0000000000000..7ced55951feb7 --- /dev/null +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + + +from pandas import MultiIndex + + +def test_sortlevel(_index): + import random + + tuples = list(_index) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), + ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py deleted file mode 100644 index b1fb5f01862ae..0000000000000 --- a/pandas/tests/indexes/test_multi.py +++ /dev/null @@ -1,3342 +0,0 @@ -# -*- coding: utf-8 -*- - -import re -import warnings - -from datetime import timedelta -from itertools import product - -import pytest - -import numpy as np - -import pandas as pd - -from pandas import (CategoricalIndex, Categorical, DataFrame, Index, - MultiIndex, compat, date_range, period_range) -from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY -from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -from pandas._libs.tslib import Timestamp - -import pandas.util.testing as tm - -from pandas.util.testing import assert_almost_equal, assert_copy - -from .common import Base - - -class TestMultiIndex(Base): - _holder = MultiIndex - _compat_props = ['shape', 'ndim', 'size'] - - def setup_method(self, method): - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - self.index_names = ['first', 'second'] - self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels - ], names=self.index_names, - verify_integrity=False)) - self.setup_indices() - - def create_index(self): - return self.index - - def test_can_hold_identifiers(self): - idx = self.create_index() - key = idx[0] - assert idx._can_hold_identifiers_and_holds_name(key) is True - - def test_boolean_context_compat2(self): - - # boolean context compat - # GH7897 - i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) - common = i1.intersection(i2) - - def f(): - if common: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - - def test_labels_dtypes(self): - - # GH 8456 - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - assert i.labels[0].dtype == 'int8' - assert i.labels[1].dtype == 'int8' - - i = MultiIndex.from_product([['a'], range(40)]) - assert i.labels[1].dtype == 'int8' - i = MultiIndex.from_product([['a'], range(400)]) - assert i.labels[1].dtype == 'int16' - i = MultiIndex.from_product([['a'], range(40000)]) - assert i.labels[1].dtype == 'int32' - - i = pd.MultiIndex.from_product([['a'], range(1000)]) - assert (i.labels[0] >= 0).all() - assert (i.labels[1] >= 0).all() - - def test_where(self): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - - def f(): - i.where(True) - - pytest.raises(NotImplementedError, f) - - def test_where_array_like(self): - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - klasses = [list, tuple, np.array, pd.Series] - cond = [False, True] - - for klass in klasses: - def f(): - return i.where(klass(cond)) - pytest.raises(NotImplementedError, f) - - def test_repeat(self): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(m.repeat(reps), expected) - - with tm.assert_produces_warning(FutureWarning): - result = m.repeat(n=reps) - tm.assert_index_equal(result, expected) - - def test_numpy_repeat(self): - reps = 2 - numbers = [1, 2, 3] - names = np.array(['foo', 'bar']) - - m = MultiIndex.from_product([ - numbers, names], names=names) - expected = MultiIndex.from_product([ - numbers, names.repeat(reps)], names=names) - tm.assert_index_equal(np.repeat(m, reps), expected) - - msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, m, reps, axis=1) - - def test_set_name_methods(self): - # so long as these are synonyms, we don't need to test set_names - assert self.index.rename == self.index.set_names - new_names = [name + "SUFFIX" for name in self.index_names] - ind = self.index.set_names(new_names) - assert self.index.names == self.index_names - assert ind.names == new_names - with tm.assert_raises_regex(ValueError, "^Length"): - ind.set_names(new_names + new_names) - new_names2 = [name + "SUFFIX2" for name in new_names] - res = ind.set_names(new_names2, inplace=True) - assert res is None - assert ind.names == new_names2 - - # set names for specific level (# GH7792) - ind = self.index.set_names(new_names[0], level=0) - assert self.index.names == self.index_names - assert ind.names == [new_names[0], self.index_names[1]] - - res = ind.set_names(new_names2[0], level=0, inplace=True) - assert res is None - assert ind.names == [new_names2[0], self.index_names[1]] - - # set names for multiple levels - ind = self.index.set_names(new_names, level=[0, 1]) - assert self.index.names == self.index_names - assert ind.names == new_names - - res = ind.set_names(new_names2, level=[0, 1], inplace=True) - assert res is None - assert ind.names == new_names2 - - @pytest.mark.parametrize('inplace', [True, False]) - def test_set_names_with_nlevel_1(self, inplace): - # GH 21149 - # Ensure that .set_names for MultiIndex with - # nlevels == 1 does not raise any errors - expected = pd.MultiIndex(levels=[[0, 1]], - labels=[[0, 1]], - names=['first']) - m = pd.MultiIndex.from_product([[0, 1]]) - result = m.set_names('first', level=0, inplace=inplace) - - if inplace: - result = m - - tm.assert_index_equal(result, expected) - - def test_set_levels_labels_directly(self): - # setting levels/labels directly raises AttributeError - - levels = self.index.levels - new_levels = [[lev + 'a' for lev in level] for level in levels] - - labels = self.index.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - with pytest.raises(AttributeError): - self.index.levels = new_levels - - with pytest.raises(AttributeError): - self.index.labels = new_labels - - def test_set_levels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - levels = self.index.levels - new_levels = [[lev + 'a' for lev in level] for level in levels] - - def assert_matching(actual, expected, check_dtype=False): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp) - tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) - - # level changing [w/o mutation] - ind2 = self.index.set_levels(new_levels) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, new_levels) - - # level changing specific level [w/o mutation] - ind2 = self.index.set_levels(new_levels[0], level=0) - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.set_levels(new_levels[1], level=1) - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/o mutation] - ind2 = self.index.set_levels(new_levels, level=[0, 1]) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, level=[0, 1], - inplace=True) - assert inplace_return is None - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # illegal level changing should not change levels - # GH 13754 - original_index = self.index.copy() - for inplace in [True, False]: - with tm.assert_raises_regex(ValueError, "^On"): - self.index.set_levels(['c'], level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels, - check_dtype=True) - - with tm.assert_raises_regex(ValueError, "^On"): - self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, - inplace=inplace) - assert_matching(self.index.labels, original_index.labels, - check_dtype=True) - - with tm.assert_raises_regex(TypeError, "^Levels"): - self.index.set_levels('c', level=0, inplace=inplace) - assert_matching(self.index.levels, original_index.levels, - check_dtype=True) - - with tm.assert_raises_regex(TypeError, "^Labels"): - self.index.set_labels(1, level=0, inplace=inplace) - assert_matching(self.index.labels, original_index.labels, - check_dtype=True) - - def test_set_labels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - labels = self.index.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - def assert_matching(actual, expected): - # avoid specifying internal representation - # as much as possible - assert len(actual) == len(expected) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp, dtype=np.int8) - tm.assert_numpy_array_equal(act, exp) - - # label changing [w/o mutation] - ind2 = self.index.set_labels(new_labels) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, new_labels) - - # label changing specific level [w/o mutation] - ind2 = self.index.set_labels(new_labels[0], level=0) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.set_labels(new_labels[1], level=1) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/o mutation] - ind2 = self.index.set_labels(new_labels, level=[0, 1]) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], - inplace=True) - assert inplace_return is None - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing for levels of different magnitude of categories - ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) - new_labels = range(129, -1, -1) - expected = pd.MultiIndex.from_tuples( - [(0, i) for i in new_labels]) - - # [w/o mutation] - result = ind.set_labels(labels=new_labels, level=1) - assert result.equals(expected) - - # [w/ mutation] - result = ind.copy() - result.set_labels(labels=new_labels, level=1, inplace=True) - assert result.equals(expected) - - def test_set_levels_labels_names_bad_input(self): - levels, labels = self.index.levels, self.index.labels - names = self.index.names - - with tm.assert_raises_regex(ValueError, 'Length of levels'): - self.index.set_levels([levels[0]]) - - with tm.assert_raises_regex(ValueError, 'Length of labels'): - self.index.set_labels([labels[0]]) - - with tm.assert_raises_regex(ValueError, 'Length of names'): - self.index.set_names([names[0]]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_names(names[0]) - - # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_levels(levels, level=0) - - # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'list-like'): - self.index.set_labels(labels, level=0) - - # should have equal lengths - with tm.assert_raises_regex(ValueError, 'Length of names'): - self.index.set_names(names[0], level=[0, 1]) - - with tm.assert_raises_regex(TypeError, 'string'): - self.index.set_names(names, level=0) - - def test_set_levels_categorical(self): - # GH13854 - index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) - for ordered in [False, True]: - cidx = CategoricalIndex(list("bac"), ordered=ordered) - result = index.set_levels(cidx, 0) - expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], - labels=index.labels) - tm.assert_index_equal(result, expected) - - result_lvl = result.get_level_values(0) - expected_lvl = CategoricalIndex(list("bacb"), - categories=cidx.categories, - ordered=cidx.ordered) - tm.assert_index_equal(result_lvl, expected_lvl) - - def test_metadata_immutable(self): - levels, labels = self.index.levels, self.index.labels - # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile('does not support mutable operations') - with tm.assert_raises_regex(TypeError, mutable_regex): - levels[0] = levels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): - levels[0][0] = levels[0][0] - # ditto for labels - with tm.assert_raises_regex(TypeError, mutable_regex): - labels[0] = labels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): - labels[0][0] = labels[0][0] - # and for names - names = self.index.names - with tm.assert_raises_regex(TypeError, mutable_regex): - names[0] = names[0] - - def test_inplace_mutation_resets_values(self): - levels = [['a', 'b', 'c'], [4]] - levels2 = [[1, 2, 3], ['a']] - labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] - - mi1 = MultiIndex(levels=levels, labels=labels) - mi2 = MultiIndex(levels=levels2, labels=labels) - vals = mi1.values.copy() - vals2 = mi2.values.copy() - - assert mi1._tuples is not None - - # Make sure level setting works - new_vals = mi1.set_levels(levels2).values - tm.assert_almost_equal(vals2, new_vals) - - # Non-inplace doesn't kill _tuples [implementation detail] - tm.assert_almost_equal(mi1._tuples, vals) - - # ...and values is still same too - tm.assert_almost_equal(mi1.values, vals) - - # Inplace should kill _tuples - mi1.set_levels(levels2, inplace=True) - tm.assert_almost_equal(mi1.values, vals2) - - # Make sure label setting works too - labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] - exp_values = np.empty((6,), dtype=object) - exp_values[:] = [(long(1), 'a')] * 6 - - # Must be 1d array of tuples - assert exp_values.shape == (6,) - new_values = mi2.set_labels(labels2).values - - # Not inplace shouldn't change - tm.assert_almost_equal(mi2._tuples, vals2) - - # Should have correct values - tm.assert_almost_equal(exp_values, new_values) - - # ...and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) - tm.assert_almost_equal(mi2.values, new_values) - - def test_copy_in_constructor(self): - levels = np.array(["a", "b", "c"]) - labels = np.array([1, 1, 2, 0, 0, 1, 1]) - val = labels[0] - mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], - copy=True) - assert mi.labels[0][0] == val - labels[0] = 15 - assert mi.labels[0][0] == val - val = levels[0] - levels[0] = "PANDA" - assert mi.levels[0][0] == val - - def test_set_value_keeps_names(self): - # motivating example from #3742 - lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] - lev2 = ['1', '2', '3'] * 2 - idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) - df = pd.DataFrame( - np.random.randn(6, 4), - columns=['one', 'two', 'three', 'four'], - index=idx) - df = df.sort_index() - assert df._is_copy is None - assert df.index.names == ('Name', 'Number') - df.at[('grethe', '4'), 'one'] = 99.34 - assert df._is_copy is None - assert df.index.names == ('Name', 'Number') - - def test_copy_names(self): - # Check that adding a "names" parameter to the copy is honored - # GH14302 - multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2']) - multi_idx1 = multi_idx.copy() - - assert multi_idx.equals(multi_idx1) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx1.names == ['MyName1', 'MyName2'] - - multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2']) - - assert multi_idx.equals(multi_idx2) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx2.names == ['NewName1', 'NewName2'] - - multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2']) - - assert multi_idx.equals(multi_idx3) - assert multi_idx.names == ['MyName1', 'MyName2'] - assert multi_idx3.names == ['NewName1', 'NewName2'] - - def test_names(self): - - # names are assigned in setup - names = self.index_names - level_names = [level.name for level in self.index.levels] - assert names == level_names - - # setting bad names on existing - index = self.index - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", - list(index.names) + ["third"]) - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", []) - - # initializing with bad names (should always be equivalent) - major_axis, minor_axis = self.index.levels - major_labels, minor_labels = self.index.labels - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) - - # names are assigned - index.names = ["a", "b"] - ind_names = list(index.names) - level_names = [level.name for level in index.levels] - assert ind_names == level_names - - def test_astype(self): - expected = self.index.copy() - actual = self.index.astype('O') - assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) - self.check_level_names(actual, expected.names) - - with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): - self.index.astype(np.dtype(int)) - - @pytest.mark.parametrize('ordered', [True, False]) - def test_astype_category(self, ordered): - # GH 18630 - msg = '> 1 ndim Categorical are not supported at this time' - with tm.assert_raises_regex(NotImplementedError, msg): - self.index.astype(CategoricalDtype(ordered=ordered)) - - if ordered is False: - # dtype='category' defaults to ordered=False, so only test once - with tm.assert_raises_regex(NotImplementedError, msg): - self.index.astype('category') - - def test_constructor_single_level(self): - result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - assert isinstance(result, MultiIndex) - expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') - tm.assert_index_equal(result.levels[0], expected) - assert result.names == ['first'] - - def test_constructor_no_levels(self): - tm.assert_raises_regex(ValueError, "non-zero number " - "of levels/labels", - MultiIndex, levels=[], labels=[]) - both_re = re.compile('Must pass both levels and labels') - with tm.assert_raises_regex(TypeError, both_re): - MultiIndex(levels=[]) - with tm.assert_raises_regex(TypeError, both_re): - MultiIndex(labels=[]) - - def test_constructor_mismatched_label_levels(self): - labels = [np.array([1]), np.array([2]), np.array([3])] - levels = ["a"] - tm.assert_raises_regex(ValueError, "Length of levels and labels " - "must be the same", MultiIndex, - levels=levels, labels=labels) - length_error = re.compile('>= length of level') - label_error = re.compile(r'Unequal label lengths: \[4, 2\]') - - # important to check that it's looking at the right thing. - with tm.assert_raises_regex(ValueError, length_error): - MultiIndex(levels=[['a'], ['b']], - labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - - with tm.assert_raises_regex(ValueError, label_error): - MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) - - # external API - with tm.assert_raises_regex(ValueError, length_error): - self.index.copy().set_levels([['a'], ['b']]) - - with tm.assert_raises_regex(ValueError, label_error): - self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - - def test_constructor_nonhashable_names(self): - # GH 20527 - levels = [[1, 2], [u'one', u'two']] - labels = [[0, 0, 1, 1], [0, 1, 0, 1]] - names = ((['foo'], ['bar'])) - message = "MultiIndex.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, - MultiIndex, levels=levels, - labels=labels, names=names) - - # With .rename() - mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=('foo', 'bar')) - renamed = [['foor'], ['barr']] - tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) - # With .set_names() - tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) - - @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], - [1, 'a', 1]]) - def test_duplicate_level_names(self, names): - # GH18872, GH19029 - mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names) - assert mi.names == names - - # With .rename() - mi = pd.MultiIndex.from_product([[0, 1]] * 3) - mi = mi.rename(names) - assert mi.names == names - - # With .rename(., level=) - mi.rename(names[1], level=1, inplace=True) - mi = mi.rename([names[0], names[2]], level=[0, 2]) - assert mi.names == names - - def test_duplicate_level_names_access_raises(self): - self.index.names = ['foo', 'foo'] - tm.assert_raises_regex(KeyError, 'Level foo not found', - self.index._get_level_number, 'foo') - - def assert_multiindex_copied(self, copy, original): - # Levels should be (at least, shallow copied) - tm.assert_copy(copy.levels, original.levels) - tm.assert_almost_equal(copy.labels, original.labels) - - # Labels doesn't matter which way copied - tm.assert_almost_equal(copy.labels, original.labels) - assert copy.labels is not original.labels - - # Names doesn't matter which way copied - assert copy.names == original.names - assert copy.names is not original.names - - # Sort order should be copied - assert copy.sortorder == original.sortorder - - def test_copy(self): - i_copy = self.index.copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_shallow_copy(self): - i_copy = self.index._shallow_copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_view(self): - i_view = self.index.view() - - self.assert_multiindex_copied(i_view, self.index) - - def check_level_names(self, index, names): - assert [level.name for level in index.levels] == list(names) - - def test_changing_names(self): - - # names should be applied to levels - level_names = [level.name for level in self.index.levels] - self.check_level_names(self.index, self.index.names) - - view = self.index.view() - copy = self.index.copy() - shallow_copy = self.index._shallow_copy() - - # changing names should change level names on object - new_names = [name + "a" for name in self.index.names] - self.index.names = new_names - self.check_level_names(self.index, new_names) - - # but not on copies - self.check_level_names(view, level_names) - self.check_level_names(copy, level_names) - self.check_level_names(shallow_copy, level_names) - - # and copies shouldn't change original - shallow_copy.names = [name + "c" for name in shallow_copy.names] - self.check_level_names(self.index, new_names) - - def test_get_level_number_integer(self): - self.index.names = [1, 0] - assert self.index._get_level_number(1) == 0 - assert self.index._get_level_number(0) == 1 - pytest.raises(IndexError, self.index._get_level_number, 2) - tm.assert_raises_regex(KeyError, 'Level fourth not found', - self.index._get_level_number, 'fourth') - - def test_from_arrays(self): - arrays = [] - for lev, lab in zip(self.index.levels, self.index.labels): - arrays.append(np.asarray(lev).take(lab)) - - # list of arrays as input - result = MultiIndex.from_arrays(arrays, names=self.index.names) - tm.assert_index_equal(result, self.index) - - # infer correctly - result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], - ['a', 'b']]) - assert result.levels[0].equals(Index([Timestamp('20130101')])) - assert result.levels[1].equals(Index(['a', 'b'])) - - def test_from_arrays_iterator(self): - # GH 18434 - arrays = [] - for lev, lab in zip(self.index.levels, self.index.labels): - arrays.append(np.asarray(lev).take(lab)) - - # iterator as input - result = MultiIndex.from_arrays(iter(arrays), names=self.index.names) - tm.assert_index_equal(result, self.index) - - # invalid iterator input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of array-likes."): - MultiIndex.from_arrays(0) - - def test_from_arrays_index_series_datetimetz(self): - idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3, - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_timedelta(self): - idx1 = pd.timedelta_range('1 days', freq='D', periods=3) - idx2 = pd.timedelta_range('2 hours', freq='H', periods=3) - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_period(self): - idx1 = pd.period_range('2011-01-01', freq='D', periods=3) - idx2 = pd.period_range('2015-01-01', freq='H', periods=3) - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_datetimelike_mixed(self): - idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern') - idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3) - idx3 = pd.timedelta_range('1 days', freq='D', periods=3) - idx4 = pd.period_range('2011-01-01', freq='D', periods=3) - - result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - tm.assert_index_equal(result.get_level_values(2), idx3) - tm.assert_index_equal(result.get_level_values(3), idx4) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), - pd.Series(idx2), - pd.Series(idx3), - pd.Series(idx4)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - tm.assert_index_equal(result2.get_level_values(2), idx3) - tm.assert_index_equal(result2.get_level_values(3), idx4) - - tm.assert_index_equal(result, result2) - - def test_from_arrays_index_series_categorical(self): - # GH13743 - idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=False) - idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=True) - - result = pd.MultiIndex.from_arrays([idx1, idx2]) - tm.assert_index_equal(result.get_level_values(0), idx1) - tm.assert_index_equal(result.get_level_values(1), idx2) - - result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) - tm.assert_index_equal(result2.get_level_values(0), idx1) - tm.assert_index_equal(result2.get_level_values(1), idx2) - - result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) - tm.assert_index_equal(result3.get_level_values(0), idx1) - tm.assert_index_equal(result3.get_level_values(1), idx2) - - def test_from_arrays_empty(self): - # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): - MultiIndex.from_arrays(arrays=[]) - - # 1 level - result = MultiIndex.from_arrays(arrays=[[]], names=['A']) - assert isinstance(result, MultiIndex) - expected = Index([], name='A') - tm.assert_index_equal(result.levels[0], expected) - - # N levels - for N in [2, 3]: - arrays = [[]] * N - names = list('ABC')[:N] - result = MultiIndex.from_arrays(arrays=arrays, names=names) - expected = MultiIndex(levels=[[]] * N, labels=[[]] * N, - names=names) - tm.assert_index_equal(result, expected) - - def test_from_arrays_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - for i in invalid_inputs: - pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i) - - def test_from_arrays_different_lengths(self): - # see gh-13599 - idx1 = [1, 2, 3] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [] - idx2 = ['a', 'b'] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - idx1 = [1, 2, 3] - idx2 = [] - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) - - def test_from_product(self): - - first = ['foo', 'bar', 'buz'] - second = ['a', 'b', 'c'] - names = ['first', 'second'] - result = MultiIndex.from_product([first, second], names=names) - - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), - ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), - ('buz', 'c')] - expected = MultiIndex.from_tuples(tuples, names=names) - - tm.assert_index_equal(result, expected) - - def test_from_product_iterator(self): - # GH 18434 - first = ['foo', 'bar', 'buz'] - second = ['a', 'b', 'c'] - names = ['first', 'second'] - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), - ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), - ('buz', 'c')] - expected = MultiIndex.from_tuples(tuples, names=names) - - # iterator as input - result = MultiIndex.from_product(iter([first, second]), names=names) - tm.assert_index_equal(result, expected) - - # Invalid non-iterable input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of iterables."): - MultiIndex.from_product(0) - - def test_from_product_empty(self): - # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): - MultiIndex.from_product([]) - - # 1 level - result = MultiIndex.from_product([[]], names=['A']) - expected = pd.Index([], name='A') - tm.assert_index_equal(result.levels[0], expected) - - # 2 levels - l1 = [[], ['foo', 'bar', 'baz'], []] - l2 = [[], [], ['a', 'b', 'c']] - names = ['A', 'B'] - for first, second in zip(l1, l2): - result = MultiIndex.from_product([first, second], names=names) - expected = MultiIndex(levels=[first, second], - labels=[[], []], names=names) - tm.assert_index_equal(result, expected) - - # GH12258 - names = ['A', 'B', 'C'] - for N in range(4): - lvl2 = lrange(N) - result = MultiIndex.from_product([[], lvl2, []], names=names) - expected = MultiIndex(levels=[[], lvl2, []], - labels=[[], [], []], names=names) - tm.assert_index_equal(result, expected) - - def test_from_product_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] - for i in invalid_inputs: - pytest.raises(TypeError, MultiIndex.from_product, iterables=i) - - def test_from_product_datetimeindex(self): - dt_index = date_range('2000-01-01', periods=2) - mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp( - '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( - '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) - tm.assert_numpy_array_equal(mi.values, etalon) - - def test_from_product_index_series_categorical(self): - # GH13743 - first = ['foo', 'bar'] - for ordered in [False, True]: - idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), - ordered=ordered) - expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"), - categories=list("bac"), - ordered=ordered) - - for arr in [idx, pd.Series(idx), idx.values]: - result = pd.MultiIndex.from_product([first, arr]) - tm.assert_index_equal(result.get_level_values(1), expected) - - def test_values_boxed(self): - tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), - (3, pd.Timestamp('2000-01-03')), - (1, pd.Timestamp('2000-01-04')), - (2, pd.Timestamp('2000-01-02')), - (3, pd.Timestamp('2000-01-03'))] - result = pd.MultiIndex.from_tuples(tuples) - expected = construct_1d_object_array_from_listlike(tuples) - tm.assert_numpy_array_equal(result.values, expected) - # Check that code branches for boxed values produce identical results - tm.assert_numpy_array_equal(result.values[:4], result[:4].values) - - def test_values_multiindex_datetimeindex(self): - # Test to ensure we hit the boxing / nobox part of MI.values - ints = np.arange(10 ** 18, 10 ** 18 + 5) - naive = pd.DatetimeIndex(ints) - aware = pd.DatetimeIndex(ints, tz='US/Central') - - idx = pd.MultiIndex.from_arrays([naive, aware]) - result = idx.values - - outer = pd.DatetimeIndex([x[0] for x in result]) - tm.assert_index_equal(outer, naive) - - inner = pd.DatetimeIndex([x[1] for x in result]) - tm.assert_index_equal(inner, aware) - - # n_lev > n_lab - result = idx[:2].values - - outer = pd.DatetimeIndex([x[0] for x in result]) - tm.assert_index_equal(outer, naive[:2]) - - inner = pd.DatetimeIndex([x[1] for x in result]) - tm.assert_index_equal(inner, aware[:2]) - - def test_values_multiindex_periodindex(self): - # Test to ensure we hit the boxing / nobox part of MI.values - ints = np.arange(2007, 2012) - pidx = pd.PeriodIndex(ints, freq='D') - - idx = pd.MultiIndex.from_arrays([ints, pidx]) - result = idx.values - - outer = pd.Int64Index([x[0] for x in result]) - tm.assert_index_equal(outer, pd.Int64Index(ints)) - - inner = pd.PeriodIndex([x[1] for x in result]) - tm.assert_index_equal(inner, pidx) - - # n_lev > n_lab - result = idx[:2].values - - outer = pd.Int64Index([x[0] for x in result]) - tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) - - inner = pd.PeriodIndex([x[1] for x in result]) - tm.assert_index_equal(inner, pidx[:2]) - - def test_append(self): - result = self.index[:3].append(self.index[3:]) - assert result.equals(self.index) - - foos = [self.index[:1], self.index[1:3], self.index[3:]] - result = foos[0].append(foos[1:]) - assert result.equals(self.index) - - # empty - result = self.index.append([]) - assert result.equals(self.index) - - def test_append_mixed_dtypes(self): - # GH 13660 - dti = date_range('2011-01-01', freq='M', periods=3, ) - dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern') - pi = period_range('2011-01', freq='M', periods=3) - - mi = MultiIndex.from_arrays([[1, 2, 3], - [1.1, np.nan, 3.3], - ['a', 'b', 'c'], - dti, dti_tz, pi]) - assert mi.nlevels == 6 - - res = mi.append(mi) - exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3], - [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], - ['a', 'b', 'c', 'a', 'b', 'c'], - dti.append(dti), - dti_tz.append(dti_tz), - pi.append(pi)]) - tm.assert_index_equal(res, exp) - - other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z'], - ['x', 'y', 'z'], ['x', 'y', 'z']]) - - res = mi.append(other) - exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'], - [1.1, np.nan, 3.3, 'x', 'y', 'z'], - ['a', 'b', 'c', 'x', 'y', 'z'], - dti.append(pd.Index(['x', 'y', 'z'])), - dti_tz.append(pd.Index(['x', 'y', 'z'])), - pi.append(pd.Index(['x', 'y', 'z']))]) - tm.assert_index_equal(res, exp) - - def test_get_level_values(self): - result = self.index.get_level_values(0) - expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], - name='first') - tm.assert_index_equal(result, expected) - assert result.name == 'first' - - result = self.index.get_level_values('first') - expected = self.index.get_level_values(0) - tm.assert_index_equal(result, expected) - - # GH 10460 - index = MultiIndex( - levels=[CategoricalIndex(['A', 'B']), - CategoricalIndex([1, 2, 3])], - labels=[np.array([0, 0, 0, 1, 1, 1]), - np.array([0, 1, 2, 0, 1, 2])]) - - exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) - tm.assert_index_equal(index.get_level_values(0), exp) - exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) - tm.assert_index_equal(index.get_level_values(1), exp) - - def test_get_level_values_int_with_na(self): - # GH 17924 - arrays = [['a', 'b', 'b'], [1, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([1, np.nan, 2]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([np.nan, np.nan, 2]) - tm.assert_index_equal(result, expected) - - def test_get_level_values_na(self): - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan]) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = pd.DatetimeIndex([0, 1, pd.NaT]) - tm.assert_index_equal(result, expected) - - arrays = [[], []] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([], dtype=object) - tm.assert_index_equal(result, expected) - - def test_get_level_values_all_na(self): - # GH 17924 when level entirely consists of nan - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1], dtype=object) - tm.assert_index_equal(result, expected) - - def test_reorder_levels(self): - # this blows up - tm.assert_raises_regex(IndexError, '^Too many levels', - self.index.reorder_levels, [2, 1, 0]) - - def test_nlevels(self): - assert self.index.nlevels == 2 - - def test_iter(self): - result = list(self.index) - expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - assert result == expected - - def test_legacy_pickle(self, datapath): - if PY3: - pytest.skip("testing for legacy pickles not " - "support on py3") - - path = datapath('indexes', 'data', 'multiindex_v1.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - assert obj.equals(obj2) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj), dtype=np.intp) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_legacy_v2_unpickle(self, datapath): - - # 0.7.3 -> 0.8.0 format manage - path = datapath('indexes', 'data', 'mindex_073.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - assert obj.equals(obj2) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj), dtype=np.intp) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = MultiIndex.from_product( - [[1, 2], ['a', 'b'], date_range('20130101', periods=3, - tz='US/Eastern') - ], names=['one', 'two', 'three']) - unpickled = tm.round_trip_pickle(index) - assert index.equal_levels(unpickled) - - def test_from_tuples_index_values(self): - result = MultiIndex.from_tuples(self.index) - assert (result.values == self.index.values).all() - - def test_contains(self): - assert ('foo', 'two') in self.index - assert ('bar', 'two') not in self.index - assert None not in self.index - - def test_contains_top_level(self): - midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) - assert 'A' in midx - assert 'A' not in midx._engine - - def test_contains_with_nat(self): - # MI with a NaT - mi = MultiIndex(levels=[['C'], - pd.date_range('2012-01-01', periods=5)], - labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], - names=[None, 'B']) - assert ('C', pd.Timestamp('2012-01-01')) in mi - for val in mi.values: - assert val in mi - - def test_is_all_dates(self): - assert not self.index.is_all_dates - - def test_is_numeric(self): - # MultiIndex is never numeric - assert not self.index.is_numeric() - - def test_getitem(self): - # scalar - assert self.index[2] == ('bar', 'one') - - # slice - result = self.index[2:5] - expected = self.index[[2, 3, 4]] - assert result.equals(expected) - - # boolean - result = self.index[[True, False, True, False, True, True]] - result2 = self.index[np.array([True, False, True, False, True, True])] - expected = self.index[[0, 2, 4, 5]] - assert result.equals(expected) - assert result2.equals(expected) - - def test_getitem_group_select(self): - sorted_idx, _ = self.index.sortlevel(0) - assert sorted_idx.get_loc('baz') == slice(3, 4) - assert sorted_idx.get_loc('foo') == slice(0, 2) - - def test_get_loc(self): - assert self.index.get_loc(('foo', 'two')) == 1 - assert self.index.get_loc(('baz', 'two')) == 3 - pytest.raises(KeyError, self.index.get_loc, ('bar', 'two')) - pytest.raises(KeyError, self.index.get_loc, 'quux') - - pytest.raises(NotImplementedError, self.index.get_loc, 'foo', - method='nearest') - - # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - pytest.raises(KeyError, index.get_loc, (1, 1)) - assert index.get_loc((2, 0)) == slice(3, 5) - - def test_get_loc_duplicates(self): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - assert result == expected - # pytest.raises(Exception, index.get_loc, 2) - - index = Index(['c', 'a', 'a', 'b', 'b']) - rs = index.get_loc('c') - xp = 0 - assert rs == xp - - def test_get_value_duplicates(self): - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - - assert index.get_loc('D') == slice(0, 3) - with pytest.raises(KeyError): - index._engine.get_value(np.array([]), 'D') - - def test_get_loc_level(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - assert loc == expected - assert new_index.equals(exp_index) - - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - assert loc == expected - assert new_index is None - - pytest.raises(KeyError, index.get_loc_level, (2, 2)) - - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( - [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - assert result == expected - assert new_index.equals(index.droplevel(0)) - - @pytest.mark.parametrize('level', [0, 1]) - @pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) - def test_get_loc_nan(self, level, null_val): - # GH 18485 : NaN in MultiIndex - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - levels[level] = np.array([0, null_val], dtype=type(null_val)) - key[level] = null_val - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - def test_get_loc_missing_nan(self): - # GH 8569 - idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) - assert isinstance(idx.get_loc(1), slice) - pytest.raises(KeyError, idx.get_loc, 3) - pytest.raises(KeyError, idx.get_loc, np.nan) - pytest.raises(KeyError, idx.get_loc, [np.nan]) - - @pytest.mark.parametrize('dtype1', [int, float, bool, str]) - @pytest.mark.parametrize('dtype2', [int, float, bool, str]) - def test_get_loc_multiple_dtypes(self, dtype1, dtype2): - # GH 18520 - levels = [np.array([0, 1]).astype(dtype1), - np.array([0, 1]).astype(dtype2)] - idx = pd.MultiIndex.from_product(levels) - assert idx.get_loc(idx[2]) == 2 - - @pytest.mark.parametrize('level', [0, 1]) - @pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) - def test_get_loc_implicit_cast(self, level, dtypes): - # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - lev_dtype, key_dtype = dtypes - levels[level] = np.array([0, 1], dtype=lev_dtype) - key[level] = key_dtype(1) - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - def test_get_loc_cast_bool(self): - # GH 19086 : int is casted to bool, but not vice-versa - levels = [[False, True], np.arange(2, dtype='int64')] - idx = MultiIndex.from_product(levels) - - assert idx.get_loc((0, 1)) == 1 - assert idx.get_loc((1, 0)) == 2 - - pytest.raises(KeyError, idx.get_loc, (False, True)) - pytest.raises(KeyError, idx.get_loc, (True, False)) - - def test_slice_locs(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - - slob = slice(*idx.slice_locs(df.index[5], df.index[15])) - sliced = stacked[slob] - expected = df[5:16].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), - df.index[15] - timedelta(seconds=30))) - sliced = stacked[slob] - expected = df[6:15].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - def test_slice_locs_with_type_mismatch(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, (1, 3)) - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, - df.index[5] + timedelta( - seconds=30), (5, 2)) - df = tm.makeCustomDataframe(5, 5) - stacked = df.stack() - idx = stacked.index - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): - idx.slice_locs(timedelta(seconds=30)) - # TODO: Try creating a UnicodeDecodeError in exception message - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): - idx.slice_locs(df.index[1], (16, "a")) - - def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " - "MultiIndex lexsort depth", - index.slice_locs, (1, 0, 1), (2, 1, 0)) - - # works - sorted_index, _ = index.sortlevel(0) - # should there be a test case here??? - sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) - - def test_slice_locs_partial(self): - sorted_idx, _ = self.index.sortlevel(0) - - result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) - assert result == (1, 5) - - result = sorted_idx.slice_locs(None, ('qux', 'one')) - assert result == (0, 5) - - result = sorted_idx.slice_locs(('foo', 'two'), None) - assert result == (1, len(sorted_idx)) - - result = sorted_idx.slice_locs('bar', 'baz') - assert result == (2, 4) - - def test_slice_locs_not_contained(self): - # some searchsorted action - - index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], - labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], - [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) - - result = index.slice_locs((1, 0), (5, 2)) - assert result == (3, 6) - - result = index.slice_locs(1, 5) - assert result == (3, 6) - - result = index.slice_locs((2, 2), (5, 2)) - assert result == (3, 6) - - result = index.slice_locs(2, 5) - assert result == (3, 6) - - result = index.slice_locs((1, 0), (6, 3)) - assert result == (3, 8) - - result = index.slice_locs(-1, 10) - assert result == (0, len(index)) - - def test_consistency(self): - # need to construct an overflow - major_axis = lrange(70000) - minor_axis = lrange(10) - - major_labels = np.arange(70000) - minor_labels = np.repeat(lrange(10), 7000) - - # the fact that is works means it's consistent - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - # inconsistent - major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - assert not index.is_unique - - def test_truncate(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - result = index.truncate(before=1) - assert 'foo' not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(after=1) - assert 2 not in result.levels[0] - assert 1 in result.levels[0] - - result = index.truncate(before=1, after=2) - assert len(result.levels[0]) == 2 - - # after < before - pytest.raises(ValueError, index.truncate, 3, 1) - - def test_get_indexer(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) - - r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) - - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2.values) - rexp1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, rexp1) - - r1 = idx1.get_indexer([1, 2, 3]) - assert (r1 == [-1, -1, -1]).all() - - # create index with duplicates - idx1 = Index(lrange(10) + lrange(10)) - idx2 = Index(lrange(20)) - - msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): - idx1.get_indexer(idx2) - - def test_get_indexer_nearest(self): - midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='nearest') - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='pad', tolerance=2) - - def test_get_indexer_categorical_time(self): - # https://github.com/pandas-dev/pandas/issues/21390 - midx = MultiIndex.from_product( - [Categorical(['a', 'b', 'c']), - Categorical(date_range("2012-01-01", periods=3, freq='H'))]) - result = midx.get_indexer(midx) - tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) - - def test_hash_collisions(self): - # non-smoke test that we don't get hash collisions - - index = MultiIndex.from_product([np.arange(1000), np.arange(1000)], - names=['one', 'two']) - result = index.get_indexer(index.values) - tm.assert_numpy_array_equal(result, np.arange( - len(index), dtype='intp')) - - for i in [0, 1, len(index) - 2, len(index) - 1]: - result = index.get_loc(index[i]) - assert result == i - - def test_format(self): - self.index.format() - self.index[:0].format() - - def test_format_integer_names(self): - index = MultiIndex(levels=[[0, 1], [0, 1]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) - index.format(names=True) - - def test_format_sparse_display(self): - index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], - labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], - [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) - - result = index.format() - assert result[3] == '1 0 0 0' - - def test_format_sparse_config(self): - warn_filters = warnings.filters - warnings.filterwarnings('ignore', category=FutureWarning, - module=".*format") - # GH1538 - pd.set_option('display.multi_sparse', False) - - result = self.index.format() - assert result[1] == 'foo two' - - tm.reset_display_options() - - warnings.filters = warn_filters - - def test_to_frame(self): - tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] - - index = MultiIndex.from_tuples(tuples) - result = index.to_frame(index=False) - expected = DataFrame(tuples) - tm.assert_frame_equal(result, expected) - - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')] - index = MultiIndex.from_tuples(tuples, names=['first', 'second']) - result = index.to_frame(index=False) - expected = DataFrame(tuples) - expected.columns = ['first', 'second'] - tm.assert_frame_equal(result, expected) - - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) - result = index.to_frame(index=False) - expected = DataFrame( - {0: np.repeat(np.arange(5, dtype='int64'), 3), - 1: np.tile(pd.date_range('20130101', periods=3), 5)}) - tm.assert_frame_equal(result, expected) - - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) - result = index.to_frame() - expected.index = index - tm.assert_frame_equal(result, expected) - - def test_to_hierarchical(self): - # GH21613 - index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two')]) - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(3) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - # K > 1 - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(3, 2) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - # non-sorted - index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), - (2, 'a'), (2, 'b')], - names=['N1', 'N2']) - with tm.assert_produces_warning(FutureWarning): - result = index.to_hierarchical(2) - expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), - (1, 'b'), - (2, 'a'), (2, 'a'), - (2, 'b'), (2, 'b')], - names=['N1', 'N2']) - tm.assert_index_equal(result, expected) - assert result.names == index.names - - def test_bounds(self): - self.index._bounds - - def test_equals_multi(self): - assert self.index.equals(self.index) - assert not self.index.equals(self.index.values) - assert self.index.equals(Index(self.index.values)) - - assert self.index.equal_levels(self.index) - assert not self.index.equals(self.index[:-1]) - assert not self.index.equals(self.index[-1]) - - # different number of levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - assert not index.equals(index2) - assert not index.equal_levels(index2) - - # levels are different - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not self.index.equals(index) - assert not self.index.equal_levels(index) - - # some of the labels are different - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not self.index.equals(index) - - def test_equals_missing_values(self): - # make sure take is not using -1 - i = pd.MultiIndex.from_tuples([(0, pd.NaT), - (0, pd.Timestamp('20130101'))]) - result = i[0:1].equals(i[0]) - assert not result - result = i[1:2].equals(i[1]) - assert not result - - def test_identical(self): - mi = self.index.copy() - mi2 = self.index.copy() - assert mi.identical(mi2) - - mi = mi.set_names(['new1', 'new2']) - assert mi.equals(mi2) - assert not mi.identical(mi2) - - mi2 = mi2.set_names(['new1', 'new2']) - assert mi.identical(mi2) - - mi3 = Index(mi.tolist(), names=mi.names) - mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - assert mi.identical(mi3) - assert not mi.identical(mi4) - assert mi.equals(mi4) - - def test_is_(self): - mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - assert mi.is_(mi) - assert mi.is_(mi.view()) - assert mi.is_(mi.view().view().view().view()) - mi2 = mi.view() - # names are metadata, they don't change id - mi2.names = ["A", "B"] - assert mi2.is_(mi) - assert mi.is_(mi2) - - assert mi.is_(mi.set_names(["C", "D"])) - mi2 = mi.view() - mi2.set_names(["E", "F"], inplace=True) - assert mi.is_(mi2) - # levels are inherent properties, they change identity - mi3 = mi2.set_levels([lrange(10), lrange(10)]) - assert not mi3.is_(mi2) - # shouldn't change - assert mi2.is_(mi) - mi4 = mi3.view() - - # GH 17464 - Remove duplicate MultiIndex levels - mi4.set_levels([lrange(10), lrange(10)], inplace=True) - assert not mi4.is_(mi3) - mi5 = mi.view() - mi5.set_levels(mi5.levels, inplace=True) - assert not mi5.is_(mi) - - def test_union(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_union = piece1 | piece2 - - tups = sorted(self.index.values) - expected = MultiIndex.from_tuples(tups) - - assert the_union.equals(expected) - - # corner case, pass self or empty thing: - the_union = self.index.union(self.index) - assert the_union is self.index - - the_union = self.index.union(self.index[:0]) - assert the_union is self.index - - # won't work in python 3 - # tuples = self.index.values - # result = self.index[:4] | tuples[4:] - # assert result.equals(tuples) - - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) - - # result = other.union(self.index) - # assert ('foo', 'one') in result - # assert 'B' in result - - # result2 = self.index.union(other) - # assert result.equals(result2) - - def test_intersection(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_int = piece1 & piece2 - tups = sorted(self.index[3:5].values) - expected = MultiIndex.from_tuples(tups) - assert the_int.equals(expected) - - # corner case, pass self - the_int = self.index.intersection(self.index) - assert the_int is self.index - - # empty intersection: disjoint - empty = self.index[:2] & self.index[2:] - expected = self.index[:0] - assert empty.equals(expected) - - # can't do in python 3 - # tuples = self.index.values - # result = self.index & tuples - # assert result.equals(tuples) - - def test_sub(self): - - first = self.index - - # - now raises (previously was set op difference) - with pytest.raises(TypeError): - first - self.index[-3:] - with pytest.raises(TypeError): - self.index[-3:] - first - with pytest.raises(TypeError): - self.index[-3:] - first.tolist() - with pytest.raises(TypeError): - first.tolist() - self.index[-3:] - - def test_difference(self): - - first = self.index - result = first.difference(self.index[-3:]) - expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), - sortorder=0, - names=self.index.names) - - assert isinstance(result, MultiIndex) - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: reflexive - result = self.index.difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: superset - result = self.index[-3:].difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # empty difference: degenerate - result = self.index[:0].difference(self.index) - expected = self.index[:0] - assert result.equals(expected) - assert result.names == self.index.names - - # names not the same - chunklet = self.index[-3:] - chunklet.names = ['foo', 'baz'] - result = first.difference(chunklet) - assert result.names == (None, None) - - # empty, but non-equal - result = self.index.difference(self.index.sortlevel(1)[0]) - assert len(result) == 0 - - # raise Exception called with non-MultiIndex - result = first.difference(first.values) - assert result.equals(first[:0]) - - # name from empty array - result = first.difference([]) - assert first.equals(result) - assert first.names == result.names - - # name from non-empty array - result = first.difference([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( - 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) - expected.names = first.names - assert first.names == result.names - tm.assert_raises_regex(TypeError, "other must be a MultiIndex " - "or a list of tuples", - first.difference, [1, 2, 3, 4, 5]) - - def test_from_tuples(self): - tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' - 'from empty list', - MultiIndex.from_tuples, []) - - expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], - names=['a', 'b']) - - # input tuples - result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - tm.assert_index_equal(result, expected) - - def test_from_tuples_iterator(self): - # GH 18434 - # input iterator for tuples - expected = MultiIndex(levels=[[1, 3], [2, 4]], - labels=[[0, 1], [0, 1]], - names=['a', 'b']) - - result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b']) - tm.assert_index_equal(result, expected) - - # input non-iterables - with tm.assert_raises_regex( - TypeError, 'Input must be a list / sequence of tuple-likes.'): - MultiIndex.from_tuples(0) - - def test_from_tuples_empty(self): - # GH 16777 - result = MultiIndex.from_tuples([], names=['a', 'b']) - expected = MultiIndex.from_arrays(arrays=[[], []], - names=['a', 'b']) - tm.assert_index_equal(result, expected) - - def test_argsort(self): - result = self.index.argsort() - expected = self.index.values.argsort() - tm.assert_numpy_array_equal(result, expected) - - def test_sortlevel(self): - import random - - tuples = list(self.index) - random.shuffle(tuples) - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - def test_sortlevel_not_sort_remaining(self): - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) - assert sorted_idx.equals(mi) - - def test_sortlevel_deterministic(self): - tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), - ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - assert sorted_idx.equals(expected) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - assert sorted_idx.equals(expected[::-1]) - - def test_dims(self): - pass - - def test_drop(self): - dropped = self.index.drop([('foo', 'two'), ('qux', 'one')]) - - index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) - dropped2 = self.index.drop(index) - - expected = self.index[[0, 2, 3, 5]] - tm.assert_index_equal(dropped, expected) - tm.assert_index_equal(dropped2, expected) - - dropped = self.index.drop(['bar']) - expected = self.index[[0, 1, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop('foo') - expected = self.index[[2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - index = MultiIndex.from_tuples([('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, [('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, index) - pytest.raises(KeyError, self.index.drop, ['foo', 'two']) - - # partially correct argument - mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - pytest.raises(KeyError, self.index.drop, mixed_index) - - # error='ignore' - dropped = self.index.drop(index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - dropped = self.index.drop(['foo', 'two'], errors='ignore') - expected = self.index[[2, 3, 4, 5]] - tm.assert_index_equal(dropped, expected) - - # mixed partial / full drop - dropped = self.index.drop(['foo', ('qux', 'one')]) - expected = self.index[[2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - # mixed partial / full drop / error='ignore' - mixed_index = ['foo', ('qux', 'one'), 'two'] - pytest.raises(KeyError, self.index.drop, mixed_index) - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[2, 3, 5]] - tm.assert_index_equal(dropped, expected) - - def test_droplevel_with_names(self): - index = self.index[self.index.get_loc('foo')] - dropped = index.droplevel(0) - assert dropped.name == 'second' - - index = MultiIndex( - levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - dropped = index.droplevel(0) - assert dropped.names == ('two', 'three') - - dropped = index.droplevel('two') - expected = index.droplevel(1) - assert dropped.equals(expected) - - def test_droplevel_list(self): - index = MultiIndex( - levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - - dropped = index[:2].droplevel(['three', 'one']) - expected = index[:2].droplevel(2).droplevel(0) - assert dropped.equals(expected) - - dropped = index[:2].droplevel([]) - expected = index[:2] - assert dropped.equals(expected) - - with pytest.raises(ValueError): - index[:2].droplevel(['one', 'two', 'three']) - - with pytest.raises(KeyError): - index[:2].droplevel(['one', 'four']) - - def test_drop_not_lexsorted(self): - # GH 12078 - - # define the lexsorted version of the multi-index - tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')] - lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c']) - assert lexsorted_mi.is_lexsorted() - - # and the not-lexsorted version - df = pd.DataFrame(columns=['a', 'b', 'c', 'd'], - data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]]) - df = df.pivot_table(index='a', columns=['b', 'c'], values='d') - df = df.reset_index() - not_lexsorted_mi = df.columns - assert not not_lexsorted_mi.is_lexsorted() - - # compare the results - tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) - with tm.assert_produces_warning(PerformanceWarning): - tm.assert_index_equal(lexsorted_mi.drop('a'), - not_lexsorted_mi.drop('a')) - - def test_insert(self): - # key contained in all levels - new_index = self.index.insert(0, ('bar', 'two')) - assert new_index.equal_levels(self.index) - assert new_index[0] == ('bar', 'two') - - # key not contained in all levels - new_index = self.index.insert(0, ('abc', 'three')) - - exp0 = Index(list(self.index.levels[0]) + ['abc'], name='first') - tm.assert_index_equal(new_index.levels[0], exp0) - - exp1 = Index(list(self.index.levels[1]) + ['three'], name='second') - tm.assert_index_equal(new_index.levels[1], exp1) - assert new_index[0] == ('abc', 'three') - - # key wrong length - msg = "Item must have length equal to number of levels" - with tm.assert_raises_regex(ValueError, msg): - self.index.insert(0, ('foo2',)) - - left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], - columns=['1st', '2nd', '3rd']) - left.set_index(['1st', '2nd'], inplace=True) - ts = left['3rd'].copy(deep=True) - - left.loc[('b', 'x'), '3rd'] = 2 - left.loc[('b', 'a'), '3rd'] = -1 - left.loc[('b', 'b'), '3rd'] = 3 - left.loc[('a', 'x'), '3rd'] = 4 - left.loc[('a', 'w'), '3rd'] = 5 - left.loc[('a', 'a'), '3rd'] = 6 - - ts.loc[('b', 'x')] = 2 - ts.loc['b', 'a'] = -1 - ts.loc[('b', 'b')] = 3 - ts.loc['a', 'x'] = 4 - ts.loc[('a', 'w')] = 5 - ts.loc['a', 'a'] = 6 - - right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2], - ['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4], - ['a', 'w', 5], ['a', 'a', 6]], - columns=['1st', '2nd', '3rd']) - right.set_index(['1st', '2nd'], inplace=True) - # FIXME data types changes to float because - # of intermediate nan insertion; - tm.assert_frame_equal(left, right, check_dtype=False) - tm.assert_series_equal(ts, right['3rd']) - - # GH9250 - idx = [('test1', i) for i in range(5)] + \ - [('test2', i) for i in range(6)] + \ - [('test', 17), ('test', 18)] - - left = pd.Series(np.linspace(0, 10, 11), - pd.MultiIndex.from_tuples(idx[:-2])) - - left.loc[('test', 17)] = 11 - left.loc[('test', 18)] = 12 - - right = pd.Series(np.linspace(0, 12, 13), - pd.MultiIndex.from_tuples(idx)) - - tm.assert_series_equal(left, right) - - def test_take_preserve_name(self): - taken = self.index.take([3, 0, 1]) - assert taken.names == self.index.names - - def test_take_fill_value(self): - # GH 12631 - vals = [['A', 'B'], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] - idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) - - result = idx.take(np.array([1, 0, -1])) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - (np.nan, pd.NaT)] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - exp_vals = [('A', pd.Timestamp('2011-01-02')), - ('A', pd.Timestamp('2011-01-01')), - ('B', pd.Timestamp('2011-01-02'))] - expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt']) - tm.assert_index_equal(result, expected) - - msg = ('When allow_fill=True and fill_value is not None, ' - 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def take_invalid_kwargs(self): - vals = [['A', 'B'], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]] - idx = pd.MultiIndex.from_product(vals, names=['str', 'dt']) - indices = [1, 2] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') - - @pytest.mark.parametrize('other', - [Index(['three', 'one', 'two']), - Index(['one']), - Index(['one', 'three'])]) - def test_join_level(self, other, join_type): - join_index, lidx, ridx = other.join(self.index, how=join_type, - level='second', - return_indexers=True) - - exp_level = other.join(self.index.levels[1], how=join_type) - assert join_index.levels[0].equals(self.index.levels[0]) - assert join_index.levels[1].equals(exp_level) - - # pare down levels - mask = np.array( - [x[1] in exp_level for x in self.index], dtype=bool) - exp_values = self.index.values[mask] - tm.assert_numpy_array_equal(join_index.values, exp_values) - - if join_type in ('outer', 'inner'): - join_index2, ridx2, lidx2 = \ - self.index.join(other, how=join_type, level='second', - return_indexers=True) - - assert join_index.equals(join_index2) - tm.assert_numpy_array_equal(lidx, lidx2) - tm.assert_numpy_array_equal(ridx, ridx2) - tm.assert_numpy_array_equal(join_index2.values, exp_values) - - def test_join_level_corner_case(self): - # some corner cases - idx = Index(['three', 'one', 'two']) - result = idx.join(self.index, level='second') - assert isinstance(result, MultiIndex) - - tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", - self.index.join, self.index, level=1) - - def test_join_self(self, join_type): - res = self.index - joined = res.join(res, how=join_type) - assert res is joined - - def test_join_multi(self): - # GH 10665 - midx = pd.MultiIndex.from_product( - [np.arange(4), np.arange(4)], names=['a', 'b']) - idx = pd.Index([1, 2, 5], name='b') - - # inner - jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) - exp_idx = pd.MultiIndex.from_product( - [np.arange(4), [1, 2]], names=['a', 'b']) - exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) - exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) - tm.assert_index_equal(jidx, exp_idx) - tm.assert_numpy_array_equal(lidx, exp_lidx) - tm.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) - tm.assert_index_equal(jidx, exp_idx) - tm.assert_numpy_array_equal(lidx, exp_lidx) - tm.assert_numpy_array_equal(ridx, exp_ridx) - - # keep MultiIndex - jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) - exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, - 1, -1], dtype=np.intp) - tm.assert_index_equal(jidx, midx) - assert lidx is None - tm.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) - tm.assert_index_equal(jidx, midx) - assert lidx is None - tm.assert_numpy_array_equal(ridx, exp_ridx) - - def test_reindex(self): - result, indexer = self.index.reindex(list(self.index[:4])) - assert isinstance(result, MultiIndex) - self.check_level_names(result, self.index[:4].names) - - result, indexer = self.index.reindex(list(self.index)) - assert isinstance(result, MultiIndex) - assert indexer is None - self.check_level_names(result, self.index.names) - - def test_reindex_level(self): - idx = Index(['one']) - - target, indexer = self.index.reindex(idx, level='second') - target2, indexer2 = idx.reindex(self.index, level='second') - - exp_index = self.index.join(idx, level='second', how='right') - exp_index2 = self.index.join(idx, level='second', how='left') - - assert target.equals(exp_index) - exp_indexer = np.array([0, 2, 4]) - tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) - - assert target2.equals(exp_index2) - exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) - tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - - tm.assert_raises_regex(TypeError, "Fill method not supported", - self.index.reindex, self.index, - method='pad', level='second') - - tm.assert_raises_regex(TypeError, "Fill method not supported", - idx.reindex, idx, method='bfill', - level='first') - - def test_duplicates(self): - assert not self.index.has_duplicates - assert self.index.append(self.index).has_duplicates - - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - assert index.has_duplicates - - # GH 9075 - t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), - (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), - (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), - (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), - (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), - (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), - (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), - (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), - (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), - (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), - (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), - (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), - (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), - (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), - (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), - (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), - (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), - (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - - index = pd.MultiIndex.from_tuples(t) - assert not index.has_duplicates - - # handle int64 overflow if possible - def check(nlevels, with_nulls): - labels = np.tile(np.arange(500), 2) - level = np.arange(500) - - if with_nulls: # inject some null values - labels[500] = -1 # common nan value - labels = [labels.copy() for i in range(nlevels)] - for i in range(nlevels): - labels[i][500 + i - nlevels // 2] = -1 - - labels += [np.array([-1, 1]).repeat(500)] - else: - labels = [labels] * nlevels + [np.arange(2).repeat(500)] - - levels = [level] * nlevels + [[0, 1]] - - # no dups - index = MultiIndex(levels=levels, labels=labels) - assert not index.has_duplicates - - # with a dup - if with_nulls: - def f(a): - return np.insert(a, 1000, a[0]) - labels = list(map(f, labels)) - index = MultiIndex(levels=levels, labels=labels) - else: - values = index.values.tolist() - index = MultiIndex.from_tuples(values + [values[0]]) - - assert index.has_duplicates - - # no overflow - check(4, False) - check(4, True) - - # overflow possible - check(8, False) - check(8, True) - - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) - - for keep in ['first', 'last', False]: - left = mi.duplicated(keep=keep) - right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) - tm.assert_numpy_array_equal(left, right) - - # GH5873 - for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - 2, dtype='bool')) - - for n in range(1, 6): # 1st level shape - for m in range(1, 5): # 2nd level shape - # all possible unique combinations, including nan - lab = product(range(-1, n), range(-1, m)) - mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) - assert len(mi) == (n + 1) * (m + 1) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - len(mi), dtype='bool')) - - def test_duplicate_meta_data(self): - # GH 10115 - index = MultiIndex( - levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) - - for idx in [index, - index.set_names([None, None]), - index.set_names([None, 'Num']), - index.set_names(['Upper', 'Num']), ]: - assert idx.has_duplicates - assert idx.drop_duplicates().names == idx.names - - def test_get_unique_index(self): - idx = self.index[[0, 1, 0, 1, 1, 0, 0]] - expected = self.index._shallow_copy(idx[[0, 1]]) - - for dropna in [False, True]: - result = idx._get_unique_index(dropna=dropna) - assert result.unique - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('names', [None, ['first', 'second']]) - def test_unique(self, names): - mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], - names=names) - - res = mi.unique() - exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], - names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) - tm.assert_index_equal(res, exp) - - # GH #20568 - empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=names) - res = mi.unique() - tm.assert_index_equal(mi, res) - - @pytest.mark.parametrize('level', [0, 'first', 1, 'second']) - def test_unique_level(self, level): - # GH #17896 - with level= argument - result = self.index.unique(level=level) - expected = self.index.get_level_values(level).unique() - tm.assert_index_equal(result, expected) - - # With already unique level - mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], - names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - tm.assert_index_equal(result, expected) - - # With empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - - def test_unique_datetimelike(self): - idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', - '2015-01-01', 'NaT', 'NaT']) - idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', - '2015-01-02', 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() - - eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) - eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', - 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) - tm.assert_index_equal(result, exp) - - def test_tolist(self): - result = self.index.tolist() - exp = list(self.index.values) - assert result == exp - - def test_repr_with_unicode_data(self): - with pd.core.config.option_context("display.encoding", 'UTF-8'): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - index = pd.DataFrame(d).set_index(["a", "b"]).index - assert "\\u" not in repr(index) # we don't want unicode-escaped - - def test_repr_roundtrip(self): - - mi = MultiIndex.from_product([list('ab'), range(3)], - names=['first', 'second']) - str(mi) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - assert mi.get_level_values('first').inferred_type == 'string' - assert result.get_level_values('first').inferred_type == 'unicode' - - mi_u = MultiIndex.from_product( - [list(u'ab'), range(3)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - # formatting - if PY3: - str(mi) - else: - compat.text_type(mi) - - # long format - mi = MultiIndex.from_product([list('abcdefg'), range(10)], - names=['first', 'second']) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - assert mi.get_level_values('first').inferred_type == 'string' - assert result.get_level_values('first').inferred_type == 'unicode' - - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - def test_str(self): - # tested elsewhere - pass - - def test_unicode_string_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - str(idx) - else: - compat.text_type(idx) - - def test_bytestring_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - bytes(idx) - else: - str(idx) - - def test_slice_keep_name(self): - x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], - names=['x', 'y']) - assert x[1:].names == x.names - - def test_isna_behavior(self): - # should not segfault GH5123 - # NOTE: if MI representation changes, may make sense to allow - # isna(MI) - with pytest.raises(NotImplementedError): - pd.isna(self.index) - - def test_level_setting_resets_attributes(self): - ind = pd.MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert ind.is_monotonic - ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) - # if this fails, probably didn't reset the cache correctly. - assert not ind.is_monotonic - - def test_is_monotonic_increasing(self): - i = MultiIndex.from_product([np.arange(10), - np.arange(10)], names=['one', 'two']) - assert i.is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([np.arange(10, 0, -1), - np.arange(10)], names=['one', 'two']) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([np.arange(10), - np.arange(10, 0, -1)], - names=['one', 'two']) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']]) - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values).is_monotonic - assert not Index(i.values)._is_strictly_monotonic_increasing - - # string ordering - i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert not i.is_monotonic - assert not Index(i.values).is_monotonic - assert not i._is_strictly_monotonic_increasing - assert not Index(i.values)._is_strictly_monotonic_increasing - - i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], - ['mom', 'next', 'zenith']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert i.is_monotonic - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values)._is_strictly_monotonic_increasing - - # mixed levels, hits the TypeError - i = MultiIndex( - levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237', - 'nl0000289783', - 'nl0000289965', 'nl0000301109']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], - names=['household_id', 'asset_id']) - - assert not i.is_monotonic - assert not i._is_strictly_monotonic_increasing - - # empty - i = MultiIndex.from_arrays([[], []]) - assert i.is_monotonic - assert Index(i.values).is_monotonic - assert i._is_strictly_monotonic_increasing - assert Index(i.values)._is_strictly_monotonic_increasing - - def test_is_monotonic_decreasing(self): - i = MultiIndex.from_product([np.arange(9, -1, -1), - np.arange(9, -1, -1)], - names=['one', 'two']) - assert i.is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([np.arange(10), - np.arange(10, 0, -1)], - names=['one', 'two']) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([np.arange(10, 0, -1), - np.arange(10)], names=['one', 'two']) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']]) - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - # string ordering - i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], - ['three', 'two', 'one']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert not i.is_monotonic_decreasing - assert not Index(i.values).is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - assert not Index(i.values)._is_strictly_monotonic_decreasing - - i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'], - ['zenith', 'next', 'mom']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - assert i.is_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values)._is_strictly_monotonic_decreasing - - # mixed levels, hits the TypeError - i = MultiIndex( - levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965', - 'nl0000289783', 'lu0197800237', - 'gb00b03mlx29']], - labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], - names=['household_id', 'asset_id']) - - assert not i.is_monotonic_decreasing - assert not i._is_strictly_monotonic_decreasing - - # empty - i = MultiIndex.from_arrays([[], []]) - assert i.is_monotonic_decreasing - assert Index(i.values).is_monotonic_decreasing - assert i._is_strictly_monotonic_decreasing - assert Index(i.values)._is_strictly_monotonic_decreasing - - def test_is_strictly_monotonic_increasing(self): - idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) - assert idx.is_monotonic_increasing - assert not idx._is_strictly_monotonic_increasing - - def test_is_strictly_monotonic_decreasing(self): - idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']], - labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) - assert idx.is_monotonic_decreasing - assert not idx._is_strictly_monotonic_decreasing - - def test_reconstruct_sort(self): - - # starts off lexsorted & monotonic - mi = MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert mi.is_lexsorted() - assert mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert recons.is_lexsorted() - assert recons.is_monotonic - assert mi is recons - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], - labels=[[0, 1, 0, 2], [2, 0, 0, 1]], - names=['col1', 'col2']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - def test_reconstruct_remove_unused(self): - # xref to GH 2770 - df = DataFrame([['deleteMe', 1, 9], - ['keepMe', 2, 9], - ['keepMeToo', 3, 9]], - columns=['first', 'second', 'third']) - df2 = df.set_index(['first', 'second'], drop=False) - df2 = df2[df2['first'] != 'deleteMe'] - - # removed levels are there - expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], - [1, 2, 3]], - labels=[[1, 2], [1, 2]], - names=['first', 'second']) - result = df2.index - tm.assert_index_equal(result, expected) - - expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], - [2, 3]], - labels=[[0, 1], [0, 1]], - names=['first', 'second']) - result = df2.index.remove_unused_levels() - tm.assert_index_equal(result, expected) - - # idempotent - result2 = result.remove_unused_levels() - tm.assert_index_equal(result2, expected) - assert result2.is_(result) - - @pytest.mark.parametrize('level0', [['a', 'd', 'b'], - ['a', 'd', 'b', 'unused']]) - @pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], - ['w', 'x', 'y', 'z', 'unused']]) - def test_remove_unused_nan(self, level0, level1): - # GH 18417 - mi = pd.MultiIndex(levels=[level0, level1], - labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) - - result = mi.remove_unused_levels() - tm.assert_index_equal(result, mi) - for level in 0, 1: - assert('unused' not in result.levels[level]) - - @pytest.mark.parametrize('first_type,second_type', [ - ('int64', 'int64'), - ('datetime64[D]', 'str')]) - def test_remove_unused_levels_large(self, first_type, second_type): - # GH16556 - - # because tests should be deterministic (and this test in particular - # checks that levels are removed, which is not the case for every - # random input): - rng = np.random.RandomState(4) # seed is arbitrary value that works - - size = 1 << 16 - df = DataFrame(dict( - first=rng.randint(0, 1 << 13, size).astype(first_type), - second=rng.randint(0, 1 << 10, size).astype(second_type), - third=rng.rand(size))) - df = df.groupby(['first', 'second']).sum() - df = df[df.third < 0.1] - - result = df.index.remove_unused_levels() - assert len(result.levels[0]) < len(df.index.levels[0]) - assert len(result.levels[1]) < len(df.index.levels[1]) - assert result.equals(df.index) - - expected = df.reset_index().set_index(['first', 'second']).index - tm.assert_index_equal(result, expected) - - def test_isin(self): - values = [('foo', 2), ('bar', 3), ('quux', 4)] - - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - result = idx.isin(values) - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # empty, return dtype bool - idx = MultiIndex.from_arrays([[], []]) - result = idx.isin(values) - assert len(result) == 0 - assert result.dtype == np.bool_ - - @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") - def test_isin_nan_not_pypy(self): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, False])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, False])) - - @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") - def test_isin_nan_pypy(self): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, True])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, True])) - - def test_isin_level_kwarg(self): - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - - vals_0 = ['foo', 'bar', 'quux'] - vals_1 = [2, 3, 10] - - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) - - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - - pytest.raises(IndexError, idx.isin, vals_0, level=5) - pytest.raises(IndexError, idx.isin, vals_0, level=-5) - - pytest.raises(KeyError, idx.isin, vals_0, level=1.0) - pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) - pytest.raises(KeyError, idx.isin, vals_1, level='A') - - idx.names = ['A', 'B'] - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - - pytest.raises(KeyError, idx.isin, vals_1, level='C') - - def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): - # GH6552 - idx = self.index.copy() - target = idx.copy() - idx.names = target.names = [None, None] - - other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) - - # list & ndarray cases - assert idx.reindex([])[0].names == [None, None] - assert idx.reindex(np.array([]))[0].names == [None, None] - assert idx.reindex(target.tolist())[0].names == [None, None] - assert idx.reindex(target.values)[0].names == [None, None] - assert idx.reindex(other_dtype.tolist())[0].names == [None, None] - assert idx.reindex(other_dtype.values)[0].names == [None, None] - - idx.names = ['foo', 'bar'] - assert idx.reindex([])[0].names == ['foo', 'bar'] - assert idx.reindex(np.array([]))[0].names == ['foo', 'bar'] - assert idx.reindex(target.tolist())[0].names == ['foo', 'bar'] - assert idx.reindex(target.values)[0].names == ['foo', 'bar'] - assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar'] - assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar'] - - def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], - names=['foo', 'bar']) - assert idx.reindex([], level=0)[0].names == ['foo', 'bar'] - assert idx.reindex([], level=1)[0].names == ['foo', 'bar'] - - def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 - assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ - - def test_groupby(self): - groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = self.index.get_values().tolist() - exp = {1: labels[:3], 2: labels[3:]} - tm.assert_dict_equal(groups, exp) - - # GH5620 - groups = self.index.groupby(self.index) - exp = {key: [key] for key in self.index} - tm.assert_dict_equal(groups, exp) - - def test_index_name_retained(self): - # GH9857 - result = pd.DataFrame({'x': [1, 2, 6], - 'y': [2, 2, 8], - 'z': [-5, 0, 5]}) - result = result.set_index('z') - result.loc[10] = [9, 10] - df_expected = pd.DataFrame({'x': [1, 2, 6, 9], - 'y': [2, 2, 8, 10], - 'z': [-5, 0, 5, 10]}) - df_expected = df_expected.set_index('z') - tm.assert_frame_equal(result, df_expected) - - def test_equals_operator(self): - # GH9785 - assert (self.index == self.index).all() - - def test_large_multiindex_error(self): - # GH12527 - df_below_1000000 = pd.DataFrame( - 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), - columns=['dest']) - with pytest.raises(KeyError): - df_below_1000000.loc[(-1, 0), 'dest'] - with pytest.raises(KeyError): - df_below_1000000.loc[(3, 0), 'dest'] - df_above_1000000 = pd.DataFrame( - 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), - columns=['dest']) - with pytest.raises(KeyError): - df_above_1000000.loc[(-1, 0), 'dest'] - with pytest.raises(KeyError): - df_above_1000000.loc[(3, 0), 'dest'] - - def test_partial_string_timestamp_multiindex(self): - # GH10331 - dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') - abc = ['a', 'b', 'c'] - ix = pd.MultiIndex.from_product([dr, abc]) - df = pd.DataFrame({'c1': range(0, 15)}, index=ix) - idx = pd.IndexSlice - - # c1 - # 2016-01-01 00:00:00 a 0 - # b 1 - # c 2 - # 2016-01-01 12:00:00 a 3 - # b 4 - # c 5 - # 2016-01-02 00:00:00 a 6 - # b 7 - # c 8 - # 2016-01-02 12:00:00 a 9 - # b 10 - # c 11 - # 2016-01-03 00:00:00 a 12 - # b 13 - # c 14 - - # partial string matching on a single index - for df_swap in (df.swaplevel(), - df.swaplevel(0), - df.swaplevel(0, 1)): - df_swap = df_swap.sort_index() - just_a = df_swap.loc['a'] - result = just_a.loc['2016-01-01'] - expected = df.loc[idx[:, 'a'], :].iloc[0:2] - expected.index = expected.index.droplevel(1) - tm.assert_frame_equal(result, expected) - - # indexing with IndexSlice - result = df.loc[idx['2016-01-01':'2016-02-01', :], :] - expected = df - tm.assert_frame_equal(result, expected) - - # match on secondary index - result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] - expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] - tm.assert_frame_equal(result, expected) - - # Even though this syntax works on a single index, this is somewhat - # ambiguous and we don't want to extend this behavior forward to work - # in multi-indexes. This would amount to selecting a scalar from a - # column. - with pytest.raises(KeyError): - df['2016-01-01'] - - # partial string match on year only - result = df.loc['2016'] - expected = df - tm.assert_frame_equal(result, expected) - - # partial string match on date - result = df.loc['2016-01-01'] - expected = df.iloc[0:6] - tm.assert_frame_equal(result, expected) - - # partial string match on date and hour, from middle - result = df.loc['2016-01-02 12'] - expected = df.iloc[9:12] - tm.assert_frame_equal(result, expected) - - # partial string match on secondary index - result = df_swap.loc[idx[:, '2016-01-02'], :] - expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] - tm.assert_frame_equal(result, expected) - - # tuple selector with partial string match on date - result = df.loc[('2016-01-01', 'a'), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # Slicing date on first level should break (of course) - with pytest.raises(KeyError): - df_swap.loc['2016-01-01'] - - # GH12685 (partial string with daily resolution or below) - dr = date_range('2013-01-01', periods=100, freq='D') - ix = MultiIndex.from_product([dr, ['a', 'b']]) - df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) - - result = df.loc[idx['2013-03':'2013-03', :], :] - expected = df.iloc[118:180] - tm.assert_frame_equal(result, expected) - - def test_rangeindex_fallback_coercion_bug(self): - # GH 12893 - foo = pd.DataFrame(np.arange(100).reshape((10, 10))) - bar = pd.DataFrame(np.arange(100).reshape((10, 10))) - df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1) - df.index.names = ['fizz', 'buzz'] - - str(df) - expected = pd.DataFrame({'bar': np.arange(100), - 'foo': np.arange(100)}, - index=pd.MultiIndex.from_product( - [range(10), range(10)], - names=['fizz', 'buzz'])) - tm.assert_frame_equal(df, expected, check_like=True) - - result = df.index.get_level_values('fizz') - expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10) - tm.assert_index_equal(result, expected) - - result = df.index.get_level_values('buzz') - expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz') - tm.assert_index_equal(result, expected) - - def test_dropna(self): - # GH 6194 - idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], - [1, 2, np.nan, np.nan, 5], - ['a', 'b', 'c', np.nan, 'e']]) - - exp = pd.MultiIndex.from_arrays([[1, 5], - [1, 5], - ['a', 'e']]) - tm.assert_index_equal(idx.dropna(), exp) - tm.assert_index_equal(idx.dropna(how='any'), exp) - - exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], - [1, 2, np.nan, 5], - ['a', 'b', 'c', 'e']]) - tm.assert_index_equal(idx.dropna(how='all'), exp) - - msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): - idx.dropna(how='xxx') - - def test_unsortedindex(self): - # GH 11897 - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, - columns=['one', 'two']) - - # GH 16734: not sorted, but no real slicing - result = df.loc(axis=0)['z', 'a'] - expected = df.iloc[0] - tm.assert_series_equal(result, expected) - - with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', slice('a')] - df.sort_index(inplace=True) - assert len(df.loc(axis=0)['z', :]) == 2 - - with pytest.raises(KeyError): - df.loc(axis=0)['q', :] - - def test_unsortedindex_doc_examples(self): - # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa - dfm = DataFrame({'jim': [0, 0, 1, 1], - 'joe': ['x', 'x', 'z', 'y'], - 'jolie': np.random.rand(4)}) - - dfm = dfm.set_index(['jim', 'joe']) - with tm.assert_produces_warning(PerformanceWarning): - dfm.loc[(1, 'z')] - - with pytest.raises(UnsortedIndexError): - dfm.loc[(0, 'y'):(1, 'z')] - - assert not dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 1 - - # sort it - dfm = dfm.sort_index() - dfm.loc[(1, 'z')] - dfm.loc[(0, 'y'):(1, 'z')] - - assert dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 2 - - def test_tuples_with_name_string(self): - # GH 15110 and GH 14848 - - li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] - with pytest.raises(ValueError): - pd.Index(li, name='abc') - with pytest.raises(ValueError): - pd.Index(li, name='a') - - def test_nan_stays_float(self): - - # GH 7031 - idx0 = pd.MultiIndex(levels=[["A", "B"], []], - labels=[[1, 0], [-1, -1]], - names=[0, 1]) - idx1 = pd.MultiIndex(levels=[["C"], ["D"]], - labels=[[0], [0]], - names=[0, 1]) - idxm = idx0.join(idx1, how='outer') - assert pd.isna(idx0.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(idxm.get_level_values(1)[:-1]).all() - - df0 = pd.DataFrame([[1, 2]], index=idx0) - df1 = pd.DataFrame([[3, 4]], index=idx1) - dfm = df0 - df1 - assert pd.isna(df0.index.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() - - def test_million_record_attribute_error(self): - # GH 18165 - r = list(range(1000000)) - df = pd.DataFrame({'a': r, 'b': r}, - index=pd.MultiIndex.from_tuples([(x, x) for x in r])) - - with tm.assert_raises_regex(AttributeError, - "'Series' object has no attribute 'foo'"): - df['a'].foo() - - def test_duplicate_multiindex_labels(self): - # GH 17464 - # Make sure that a MultiIndex with duplicate levels throws a ValueError - with pytest.raises(ValueError): - ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) - - # And that using set_levels with duplicate levels fails - ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) - with pytest.raises(ValueError): - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) - - def test_multiindex_compare(self): - # GH 21149 - # Ensure comparison operations for MultiIndex with nlevels == 1 - # behave consistently with those for MultiIndex with nlevels > 1 - - midx = pd.MultiIndex.from_product([[0, 1]]) - - # Equality self-test: MultiIndex object vs self - expected = pd.Series([True, True]) - result = pd.Series(midx == midx) - tm.assert_series_equal(result, expected) - - # Greater than comparison: MultiIndex object vs self - expected = pd.Series([False, False]) - result = pd.Series(midx > midx) - tm.assert_series_equal(result, expected) From 5780b7ee029f61b47a7755a0d4848d233f8f592c Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Tue, 19 Jun 2018 22:47:49 -0500 Subject: [PATCH 2/7] addressing comments to pull request #21514 for GH issue #18644 --- pandas/tests/indexes/multi/test_base.py | 938 ------------------ pandas/tests/indexes/multi/test_compat.py | 144 +++ .../tests/indexes/multi/test_constructor.py | 31 + pandas/tests/indexes/multi/test_contains.py | 102 ++ pandas/tests/indexes/multi/test_conversion.py | 20 +- pandas/tests/indexes/multi/test_copy.py | 92 ++ .../tests/indexes/multi/test_equivalence.py | 244 +++++ pandas/tests/indexes/multi/test_format.py | 15 + pandas/tests/indexes/multi/test_get_set.py | 199 ++++ pandas/tests/indexes/multi/test_indexing.py | 264 +++-- pandas/tests/indexes/multi/test_integrity.py | 126 +-- pandas/tests/indexes/multi/test_join.py | 8 + pandas/tests/indexes/multi/test_monotonic.py | 52 +- pandas/tests/indexes/multi/test_operations.py | 423 +++----- pandas/tests/indexes/multi/test_reindex.py | 12 + pandas/tests/indexes/multi/test_set_ops.py | 283 ++++++ pandas/tests/indexes/multi/test_sorting.py | 46 +- 17 files changed, 1529 insertions(+), 1470 deletions(-) delete mode 100644 pandas/tests/indexes/multi/test_base.py create mode 100644 pandas/tests/indexes/multi/test_compat.py create mode 100644 pandas/tests/indexes/multi/test_equivalence.py create mode 100644 pandas/tests/indexes/multi/test_set_ops.py diff --git a/pandas/tests/indexes/multi/test_base.py b/pandas/tests/indexes/multi/test_base.py deleted file mode 100644 index de08c366e7bb9..0000000000000 --- a/pandas/tests/indexes/multi/test_base.py +++ /dev/null @@ -1,938 +0,0 @@ -# -*- coding: utf-8 -*- - -import numpy as np -import pandas as pd -import pandas.util.testing as tm -import pytest -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) -from pandas._libs.tslib import iNaT -from pandas.compat import PY3 -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin - - -def verify_pickle(indices): - unpickled = tm.round_trip_pickle(indices) - assert indices.equals(unpickled) - - -def test_pickle_compat_construction(_holder): - # this is testing for pickle compat - if _holder is None: - return - - # need an object to create with - pytest.raises(TypeError, _holder) - - -def test_to_series(_index): - # assert that we are creating a copy of the index - - idx = _index - s = idx.to_series() - assert s.values is not idx.values - assert s.index is not idx - assert s.name == idx.name - - -def test_to_series_with_arguments(_index): - # GH18699 - - # index kwarg - idx = _index - s = idx.to_series(index=idx) - - assert s.values is not idx.values - assert s.index is idx - assert s.name == idx.name - - # name kwarg - idx = _index - s = idx.to_series(name='__test') - - assert s.values is not idx.values - assert s.index is not idx - assert s.name != idx.name - - -def test_shift(_index): - - # GH8083 test the base class for shift - idx = _index - pytest.raises(NotImplementedError, idx.shift, 1) - pytest.raises(NotImplementedError, idx.shift, 1, 2) - - -def test_create_index_existing_name(_index): - - # GH11193, when an existing index is passed, and a new name is not - # specified, the new index should inherit the previous object name - expected = _index - if not isinstance(expected, MultiIndex): - expected.name = 'foo' - result = pd.Index(expected) - tm.assert_index_equal(result, expected) - - result = pd.Index(expected, name='bar') - expected.name = 'bar' - tm.assert_index_equal(result, expected) - else: - expected.names = ['foo', 'bar'] - result = pd.Index(expected) - tm.assert_index_equal( - result, Index(Index([('foo', 'one'), ('foo', 'two'), - ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], - dtype='object'), - names=['foo', 'bar'])) - - result = pd.Index(expected, names=['A', 'B']) - tm.assert_index_equal( - result, - Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - dtype='object'), names=['A', 'B'])) - - -def test_numeric_compat(_index): - - idx = _index - tm.assert_raises_regex(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __rmul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) - div_err = div_err.replace(' __', ' __r') - tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", - lambda: 1 // idx) - - -def test_logical_compat(_index): - idx = _index - tm.assert_raises_regex(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assert_raises_regex(TypeError, 'cannot perform any', - lambda: idx.any()) - - -def test_boolean_context_compat(_index): - - # boolean context compat - idx = _index - - def f(): - if idx: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - - -def test_reindex_base(_index): - idx = _index - expected = np.arange(idx.size, dtype=np.intp) - - actual = idx.get_indexer(idx) - tm.assert_numpy_array_equal(expected, actual) - - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') - - -def test_get_indexer_consistency(named_index): - # See GH 16819 - for name, index in named_index.items(): - if isinstance(index, IntervalIndex): - continue - - if index.is_unique or isinstance(index, CategoricalIndex): - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - else: - e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = index.get_indexer(index[0:2]) - - indexer, _ = index.get_indexer_non_unique(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - - -def test_ndarray_compat_properties(_index, _compat_props): - idx = _index - assert idx.T.equals(idx) - assert idx.transpose().equals(idx) - - values = idx.values - for prop in _compat_props: - assert getattr(idx, prop) == getattr(values, prop) - - # test for validity - idx.nbytes - idx.values.nbytes - - -def test_dtype_str(indices): - dtype = indices.dtype_str - assert isinstance(dtype, compat.string_types) - assert dtype == str(indices.dtype) - - -def test_repr_max_seq_item_setting(_index): - # GH10182 - idx = _index - idx = idx.repeat(50) - with pd.option_context("display.max_seq_items", None): - repr(idx) - assert '...' not in str(idx) - - -def test_wrong_number_names(indices): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - tm.assert_raises_regex(ValueError, "^Length", testit, indices) - - -def test_hash_error(indices): - index = indices - tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__, hash, indices) - - -def test_copy_name(named_index): - # gh-12309: Check that the "name" argument - # passed at initialization is honored. - - for name, index in compat.iteritems(named_index): - if isinstance(index, MultiIndex): - continue - - first = index.__class__(index, copy=True, name='mario') - second = first.__class__(first, copy=False) - - # Even though "copy=False", we want a new object. - assert first is not second - - # Not using tm.assert_index_equal() since names differ. - assert index.equals(first) - - assert first.name == 'mario' - assert second.name == 'mario' - - s1 = Series(2, index=first) - s2 = Series(3, index=second[:-1]) - - if not isinstance(index, CategoricalIndex): - # See gh-13365 - s3 = s1 * s2 - assert s3.index.name == 'mario' - - -def test_ensure_copied_data(named_index): - # Check the "copy" argument of each Index.__new__ is honoured - # GH12309 - for name, index in compat.iteritems(named_index): - init_kwargs = {} - if isinstance(index, PeriodIndex): - # Needs "freq" specification: - init_kwargs['freq'] = index.freq - elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): - # RangeIndex cannot be initialized from data - # MultiIndex and CategoricalIndex are tested separately - continue - - index_type = index.__class__ - result = index_type(index.values, copy=True, **init_kwargs) - tm.assert_index_equal(index, result) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='copy') - - if isinstance(index, PeriodIndex): - # .values an object array of Period, thus copied - result = index_type(ordinal=index.asi8, copy=False, - **init_kwargs) - tm.assert_numpy_array_equal(index._ndarray_values, - result._ndarray_values, - check_same='same') - elif isinstance(index, IntervalIndex): - # checked in test_interval.py - pass - else: - result = index_type(index.values, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='same') - tm.assert_numpy_array_equal(index._ndarray_values, - result._ndarray_values, - check_same='same') - - -def test_copy_and_deepcopy(indices): - from copy import copy, deepcopy - - if isinstance(indices, MultiIndex): - return - for func in (copy, deepcopy): - idx_copy = func(indices) - assert idx_copy is not indices - assert idx_copy.equals(indices) - - new_copy = indices.copy(deep=True, name="banana") - assert new_copy.name == "banana" - - -def test_unique_na(): - idx = pd.Index([2, np.nan, 2, 1], name='my_index') - expected = pd.Index([2, np.nan, 1], name='my_index') - result = idx.unique() - tm.assert_index_equal(result, expected) - - -def test_sort(indices): - pytest.raises(TypeError, indices.sort) - - -def test_mutability(indices): - if not len(indices): - return - pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) - - -def test_compat(indices): - assert indices.tolist() == list(indices) - - -def test_memory_usage(named_index): - for name, index in compat.iteritems(named_index): - result = index.memory_usage() - if len(index): - index.get_loc(index[0]) - result2 = index.memory_usage() - result3 = index.memory_usage(deep=True) - - # RangeIndex, IntervalIndex - # don't have engines - if not isinstance(index, (RangeIndex, IntervalIndex)): - assert result2 > result - - if index.inferred_type == 'object': - assert result3 > result2 - - else: - - # we report 0 for no-length - assert result == 0 - - -def test_numpy_argsort(named_index): - for k, ind in named_index.items(): - result = np.argsort(ind) - expected = ind.argsort() - tm.assert_numpy_array_equal(result, expected) - - # these are the only two types that perform - # pandas compatibility input validation - the - # rest already perform separate (or no) such - # validation via their 'values' attribute as - # defined in pandas.core.indexes/base.py - they - # cannot be changed at the moment due to - # backwards compatibility concerns - if isinstance(type(ind), (CategoricalIndex, RangeIndex)): - msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, ind, axis=1) - - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, kind='mergesort') - - msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, order=('a', 'b')) - - -def test_pickle(indices): - verify_pickle(indices) - original_name, indices.name = indices.name, 'foo' - verify_pickle(indices) - indices.name = original_name - - -def test_take(named_index): - indexer = [4, 3, 0, 2] - for k, ind in named_index.items(): - - # separate - if k in ['boolIndex', 'tuples', 'empty']: - continue - - result = ind.take(indexer) - expected = ind[indexer] - assert result.equals(expected) - - if not isinstance(ind, - (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # GH 10791 - with pytest.raises(AttributeError): - ind.freq - - -def test_take_invalid_kwargs(_index): - idx = _index - indices = [1, 2] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) - - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') - - -def test_setops_errorcases(named_index): - for name, idx in compat.iteritems(named_index): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] - - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) - - -def test_intersection_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[:5] - second = idx[:3] - intersect = first.intersection(second) - - if isinstance(idx, CategoricalIndex): - pass - else: - assert tm.equalContents(intersect, second) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.intersection(case) - assert tm.equalContents(result, second) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) - - -def test_union_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[3:] - second = idx[:5] - everything = idx - union = first.union(second) - assert tm.equalContents(union, everything) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.union(case) - assert tm.equalContents(result, everything) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) - - -def test_difference_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[2:] - second = idx[:4] - answer = idx[4:] - result = first.difference(second) - - if isinstance(idx, CategoricalIndex): - pass - else: - assert tm.equalContents(result, answer) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) - elif isinstance(idx, CategoricalIndex): - pass - elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.sort_values().asi8, - answer.sort_values().asi8) - else: - result = first.difference(case) - assert tm.equalContents(result, answer) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) - - -def test_symmetric_difference(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[1:] - second = idx[:-1] - if isinstance(idx, CategoricalIndex): - pass - else: - answer = idx[[0, -1]] - result = first.symmetric_difference(second) - assert tm.equalContents(result, answer) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.symmetric_difference(case) - assert tm.equalContents(result, answer) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - first.symmetric_difference([1, 2, 3]) - - -def test_insert_base(named_index): - - for name, idx in compat.iteritems(named_index): - result = idx[1:4] - - if not len(idx): - continue - - # test 0th element - assert idx[0:4].equals(result.insert(0, idx[0])) - - -def test_delete_base(named_index): - - for name, idx in compat.iteritems(named_index): - - if not len(idx): - continue - - if isinstance(idx, RangeIndex): - # tested in class - continue - - expected = idx[1:] - result = idx.delete(0) - assert result.equals(expected) - assert result.name == expected.name - - expected = idx[:-1] - result = idx.delete(-1) - assert result.equals(expected) - assert result.name == expected.name - - with pytest.raises((IndexError, ValueError)): - # either depending on numpy version - result = idx.delete(len(idx)) - - -def test_equals(named_index): - - for name, idx in compat.iteritems(named_index): - assert idx.equals(idx) - assert idx.equals(idx.copy()) - assert idx.equals(idx.astype(object)) - - assert not idx.equals(list(idx)) - assert not idx.equals(np.array(idx)) - - # Cannot pass in non-int64 dtype to RangeIndex - if not isinstance(idx, RangeIndex): - same_values = Index(idx, dtype=object) - assert idx.equals(same_values) - assert same_values.equals(idx) - - if idx.nlevels == 1: - # do not test MultiIndex - assert not idx.equals(pd.Series(idx)) - - -def test_equals_op(_index): - # GH9947, GH10637 - index_a = _index - if isinstance(index_a, PeriodIndex): - return - - n = len(index_a) - index_b = index_a[0:-1] - index_c = index_a[0:-1].append(index_a[-2:-1]) - index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == index_b - expected1 = np.array([True] * n) - expected2 = np.array([True] * (n - 1) + [False]) - tm.assert_numpy_array_equal(index_a == index_a, expected1) - tm.assert_numpy_array_equal(index_a == index_c, expected2) - - # test comparisons with numpy arrays - array_a = np.array(index_a) - array_b = np.array(index_a[0:-1]) - array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) - array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == array_b - tm.assert_numpy_array_equal(index_a == array_a, expected1) - tm.assert_numpy_array_equal(index_a == array_c, expected2) - - # test comparisons with Series - series_a = Series(array_a) - series_b = Series(array_b) - series_c = Series(array_c) - series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == series_b - - tm.assert_numpy_array_equal(index_a == series_a, expected1) - tm.assert_numpy_array_equal(index_a == series_c, expected2) - - # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): - index_a == array_d - msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): - series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): - series_a == array_d - - # comparing with a scalar should broadcast; note that we are excluding - # MultiIndex because in this case each item in the index is a tuple of - # length 2, and therefore is considered an array of length 2 in the - # comparison instead of a scalar - if not isinstance(index_a, MultiIndex): - expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) - # assuming the 2nd to last item is unique in the data - item = index_a[-2] - tm.assert_numpy_array_equal(index_a == item, expected3) - tm.assert_series_equal(series_a == item, Series(expected3)) - - -def test_numpy_ufuncs(named_index): - # test ufuncs of numpy 1.9.2. see: - # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - - for name, idx in compat.iteritems(named_index): - for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, - np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, - np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, - np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, - np.rad2deg]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - # PeriodIndex behavior should be changed in future version - with pytest.raises(Exception): - with np.errstate(all='ignore'): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # coerces to float (e.g. np.sin) - with np.errstate(all='ignore'): - result = func(idx) - exp = Index(func(idx.values), name=idx.name) - - tm.assert_index_equal(result, exp) - assert isinstance(result, pd.Float64Index) - else: - # raise AttributeError or TypeError - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - with np.errstate(all='ignore'): - func(idx) - - for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - with pytest.raises(Exception): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # Results in bool array - result = func(idx) - assert isinstance(result, np.ndarray) - assert not isinstance(result, Index) - else: - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - func(idx) - - -def test_hasnans_isnans(named_index): - # GH 11343, added tests for hasnans / isnans - for name, index in named_index.items(): - if isinstance(index, MultiIndex): - pass - else: - idx = index.copy() - - # cases in indices doesn't include NaN - expected = np.array([False] * len(idx), dtype=bool) - tm.assert_numpy_array_equal(idx._isnan, expected) - assert not idx.hasnans - - idx = index.copy() - values = idx.values - - if len(index) == 0: - continue - elif isinstance(index, DatetimeIndexOpsMixin): - values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - tm.assert_numpy_array_equal(idx._isnan, expected) - assert idx.hasnans - - -def test_fillna(named_index): - # GH 11343 - for name, index in named_index.items(): - if len(index) == 0: - pass - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): - idx.fillna(idx[0]) - else: - idx = index.copy() - result = idx.fillna(idx[0]) - tm.assert_index_equal(result, idx) - assert result is not idx - - msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): - idx.fillna([idx[0]]) - - idx = index.copy() - values = idx.values - - if isinstance(index, DatetimeIndexOpsMixin): - values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - tm.assert_numpy_array_equal(idx._isnan, expected) - assert idx.hasnans - - -def test_nulls(named_index): - # this is really a smoke test for the methods - # as these are adequately tested for function elsewhere - - for name, index in named_index.items(): - if len(index) == 0: - tm.assert_numpy_array_equal( - index.isna(), np.array([], dtype=bool)) - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): - idx.isna() - else: - - if not index.hasnans: - tm.assert_numpy_array_equal( - index.isna(), np.zeros(len(index), dtype=bool)) - tm.assert_numpy_array_equal( - index.notna(), np.ones(len(index), dtype=bool)) - else: - result = isna(index) - tm.assert_numpy_array_equal(index.isna(), result) - tm.assert_numpy_array_equal(index.notna(), ~result) - - -def test_empty(_index): - # GH 15270 - index = _index - assert not index.empty - assert index[:0].empty - - -def test_join_self_unique(_index, join_type): - index = _index - if index.is_unique: - joined = index.join(index, how=join_type) - assert (index == joined).all() - - -def test_searchsorted_monotonic(indices): - # GH17271 - # not implemented for tuple searches in MultiIndex - # or Intervals searches in IntervalIndex - if isinstance(indices, (MultiIndex, IntervalIndex)): - return - - # nothing to test if the index is empty - if indices.empty: - return - value = indices[0] - - # determine the expected results (handle dupes for 'right') - expected_left, expected_right = 0, (indices == value).argmin() - if expected_right == 0: - # all values are the same, expected_right should be length - expected_right = len(indices) - - # test _searchsorted_monotonic in all cases - # test searchsorted only for increasing - if indices.is_monotonic_increasing: - ssm_left = indices._searchsorted_monotonic(value, side='left') - assert expected_left == ssm_left - - ssm_right = indices._searchsorted_monotonic(value, side='right') - assert expected_right == ssm_right - - ss_left = indices.searchsorted(value, side='left') - assert expected_left == ss_left - - ss_right = indices.searchsorted(value, side='right') - assert expected_right == ss_right - - elif indices.is_monotonic_decreasing: - ssm_left = indices._searchsorted_monotonic(value, side='left') - assert expected_left == ssm_left - - ssm_right = indices._searchsorted_monotonic(value, side='right') - assert expected_right == ssm_right - - else: - # non-monotonic should raise. - with pytest.raises(ValueError): - indices._searchsorted_monotonic(value, side='left') - - -def test_map(_index): - # callable - index = _index - - # we don't infer UInt64 - if isinstance(index, pd.UInt64Index): - expected = index.astype('int64') - else: - expected = index - - result = index.map(lambda x: x) - tm.assert_index_equal(result, expected) - - -@pytest.mark.parametrize( - "mapper", - [ - lambda values, index: {i: e for e, i in zip(values, index)}, - lambda values, index: pd.Series(values, index)]) -def test_map_dictlike(_index, mapper): - - index = _index - if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): - pytest.skip("skipping tests for {}".format(type(index))) - - identity = mapper(index.values, index) - - # we don't infer to UInt64 for a dict - if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): - expected = index.astype('int64') - else: - expected = index - - result = index.map(identity) - tm.assert_index_equal(result, expected) - - # empty mappable - expected = pd.Index([np.nan] * len(index)) - result = index.map(mapper(expected, index)) - tm.assert_index_equal(result, expected) - - -def test_putmask_with_wrong_mask(_index): - # GH18368 - index = _index - - with pytest.raises(ValueError): - index.putmask(np.ones(len(index) + 1, np.bool), 1) - - with pytest.raises(ValueError): - index.putmask(np.ones(len(index) - 1, np.bool), 1) - - with pytest.raises(ValueError): - index.putmask('foo', 1) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py new file mode 100644 index 0000000000000..511bc335a705a --- /dev/null +++ b/pandas/tests/indexes/multi/test_compat.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- + + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) +from pandas._libs.tslib import iNaT +from pandas.compat import PY3 +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.compat import PY3, PYPY, lrange, lzip, range, u, long +import numpy as np + + + +def test_numeric_compat(_index): + + idx = _index + tm.assert_raises_regex(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assert_raises_regex(TypeError, "cannot perform __rmul__", + lambda: 1 * idx) + + div_err = "cannot perform __truediv__" if PY3 \ + else "cannot perform __div__" + tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + div_err = div_err.replace(' __', ' __r') + tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) + tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", + lambda: 1 // idx) + + +def test_logical_compat(_index): + idx = _index + tm.assert_raises_regex(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assert_raises_regex(TypeError, 'cannot perform any', + lambda: idx.any()) + + +def test_boolean_context_compat(_index): + + # boolean context compat + idx = _index + + def f(): + if idx: + pass + + tm.assert_raises_regex(ValueError, 'The truth value of a', f) + + +def test_boolean_context_compat2(): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) + common = i1.intersection(i2) + + def f(): + if common: + pass + + tm.assert_raises_regex(ValueError, 'The truth value of a', f) + + +def test_inplace_mutation_resets_values(): + levels = [['a', 'b', 'c'], [4]] + levels2 = [[1, 2, 3], ['a']] + labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, labels=labels) + mi2 = MultiIndex(levels=levels2, labels=labels) + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + assert mi1._tuples is not None + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should kill _tuples + mi1.set_levels(levels2, inplace=True) + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(long(1), 'a')] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + new_values = mi2.set_labels(labels2).values + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc + mi2.set_labels(labels2, inplace=True) + tm.assert_almost_equal(mi2.values, new_values) + + +def test_ndarray_compat_properties(_index, _compat_props): + idx = _index + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + for prop in _compat_props: + assert getattr(idx, prop) == getattr(values, prop) + + # test for validity + idx.nbytes + idx.values.nbytes + + +def test_compat(indices): + assert indices.tolist() == list(indices) + +def test_pickle_compat_construction(_holder): + # this is testing for pickle compat + if _holder is None: + return + + # need an object to create with + pytest.raises(TypeError, _holder) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index f4c0552578680..6e576c1c7e043 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -476,3 +476,34 @@ def test_from_product_iterator(): with tm.assert_raises_regex( TypeError, "Input must be a list / sequence of iterables."): MultiIndex.from_product(0) + + +def test_create_index_existing_name(_index): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = _index + index.names = ['foo', 'bar'] + result = pd.Index(index) + tm.assert_index_equal( + result, Index(Index([('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object'), + names=['foo', 'bar'])) + + result = pd.Index(index, names=['A', 'B']) + tm.assert_index_equal( + result, + Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + dtype='object'), names=['A', 'B'])) + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + with pytest.raises(ValueError): + pd.Index(li, name='abc') + with pytest.raises(ValueError): + pd.Index(li, name='a') diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index 156ec47a09a38..44edc3f82249f 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -2,6 +2,10 @@ import pandas as pd from pandas import MultiIndex +import pytest +from pandas.compat import PY3, PYPY, lrange, lzip, range, u +import numpy as np +import pandas.util.testing as tm def test_contains_top_level(): @@ -25,3 +29,101 @@ def test_contains(_index): assert ('foo', 'two') in _index assert ('bar', 'two') not in _index assert None not in _index + + +@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + + +def test_isin(): + values = [('foo', 2), ('bar', 3), ('quux', 4)] + + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_not_pypy(): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, False])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, False])) + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + + vals_0 = ['foo', 'bar', 'quux'] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + pytest.raises(IndexError, idx.isin, vals_0, level=5) + pytest.raises(IndexError, idx.isin, vals_0, level=-5) + + pytest.raises(KeyError, idx.isin, vals_0, level=1.0) + pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) + pytest.raises(KeyError, idx.isin, vals_1, level='A') + + idx.names = ['A', 'B'] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) + + pytest.raises(KeyError, idx.isin, vals_1, level='C') + + +def test_hasnans_isnans(named_index): + # GH 11343, added tests for hasnans / isnans + for name, index in named_index.items(): + if isinstance(index, MultiIndex): + pass + else: + idx = index.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(idx), dtype=bool) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert not idx.hasnans + + idx = index.copy() + values = idx.values + + if len(index) == 0: + continue + elif isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index dbf575129e7c2..318ebb660a6b6 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -7,20 +7,9 @@ import pytest from pandas import DataFrame, MultiIndex, date_range from pandas.compat import PY3, range - from pandas.util.testing import assert_almost_equal -def test_tuples_with_name_string(): - # GH 15110 and GH 14848 - - li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] - with pytest.raises(ValueError): - pd.Index(li, name='abc') - with pytest.raises(ValueError): - pd.Index(li, name='a') - - def test_tolist(_index): result = _index.tolist() exp = list(_index.values) @@ -150,3 +139,12 @@ def test_roundtrip_pickle_with_tz(): ], names=['one', 'two', 'three']) unpickled = tm.round_trip_pickle(index) assert index.equal_levels(unpickled) + + +def test_pickle(indices): + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + original_name, indices.name = indices.name, 'foo' + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + indices.name = original_name diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 097adc25da86c..1d0f8cb6160d6 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -1,7 +1,18 @@ # -*- coding: utf-8 -*- +import numpy as np +import pandas as pd import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) +from pandas._libs.tslib import iNaT +from pandas.compat import PY3 +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin def assert_multiindex_copied(copy, original): @@ -36,3 +47,84 @@ def test_shallow_copy(_index): def test_view(_index): i_view = _index.view() assert_multiindex_copied(i_view, _index) + + +def test_copy_name(named_index): + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + + for name, index in compat.iteritems(named_index): + if isinstance(index, MultiIndex): + continue + + first = index.__class__(index, copy=True, name='mario') + second = first.__class__(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + + # Not using tm.assert_index_equal() since names differ. + assert index.equals(first) + + assert first.name == 'mario' + assert second.name == 'mario' + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + if not isinstance(index, CategoricalIndex): + # See gh-13365 + s3 = s1 * s2 + assert s3.index.name == 'mario' + + +def test_ensure_copied_data(named_index): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + for name, index in compat.iteritems(named_index): + init_kwargs = {} + if isinstance(index, PeriodIndex): + # Needs "freq" specification: + init_kwargs['freq'] = index.freq + elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + continue + + index_type = index.__class__ + result = index_type(index.values, copy=True, **init_kwargs) + tm.assert_index_equal(index, result) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='copy') + + if isinstance(index, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, + **init_kwargs) + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + elif isinstance(index, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, + check_same='same') + + +def test_copy_and_deepcopy(indices): + from copy import copy, deepcopy + + if isinstance(indices, MultiIndex): + return + for func in (copy, deepcopy): + idx_copy = func(indices) + assert idx_copy is not indices + assert idx_copy.equals(indices) + + new_copy = indices.copy(deep=True, name="banana") + assert new_copy.name == "banana" diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 0000000000000..179a214d6ba21 --- /dev/null +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,244 @@ +# -*- coding: utf-8 -*- + +import warnings +from itertools import product + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, + Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + date_range, isna, period_range) +from pandas._libs.tslib import iNaT +from pandas.compat import PY3, PYPY, lrange, lzip, range, u +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas.util.testing import assert_copy + + +def test_equals(named_index): + + for name, idx in compat.iteritems(named_index): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(idx, RangeIndex): + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(pd.Series(idx)) + + +def test_equals_op(_index): + # GH9947, GH10637 + index_a = _index + if isinstance(index_a, PeriodIndex): + return + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == index_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with tm.assert_raises_regex(ValueError, msg): + series_a == series_d + with tm.assert_raises_regex(ValueError, "Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_equals_multi(_index): + assert _index.equals(_index) + assert not _index.equals(_index.values) + assert _index.equals(Index(_index.values)) + + assert _index.equal_levels(_index) + assert not _index.equals(_index[:-1]) + assert not _index.equals(_index[-1]) + + # different number of levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3]) + minor_labels = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not _index.equals(index) + assert not _index.equal_levels(index) + + # some of the labels are different + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + assert not _index.equals(index) + + +def test_identical(_index): + mi = _index.copy() + mi2 = _index.copy() + assert mi.identical(mi2) + + mi = mi.set_names(['new1', 'new2']) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(['new1', 'new2']) + assert mi.identical(mi2) + + mi3 = Index(mi.tolist(), names=mi.names) + mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(_index): + # GH9785 + assert (_index == _index).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), + (0, pd.Timestamp('20130101'))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_is_(): + mi = MultiIndex.from_tuples(lzip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([lrange(10), lrange(10)]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + mi4.set_levels([lrange(10), lrange(10)], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_is_all_dates(_index): + assert not _index.is_all_dates + + +def test_is_numeric(_index): + # MultiIndex is never numeric + assert not _index.is_numeric() + + +def test_nulls(named_index): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + for name, index in named_index.items(): + if len(index) == 0: + tm.assert_numpy_array_equal( + index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() + else: + + if not index.hasnans: + tm.assert_numpy_array_equal( + index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 97fc343f01a5f..e06aec26b9105 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -9,6 +9,12 @@ from pandas.compat import PY3, range, u +def test_dtype_str(indices): + dtype = indices.dtype_str + assert isinstance(dtype, compat.string_types) + assert dtype == str(indices.dtype) + + def test_format(_index): _index.format() _index[:0].format() @@ -117,3 +123,12 @@ def test_bytestring_with_unicode(): bytes(idx) else: str(idx) + + +def test_repr_max_seq_item_setting(_index): + # GH10182 + idx = _index + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert '...' not in str(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index faeaf35592de4..5a270c019a4f2 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -7,6 +7,185 @@ import pytest from pandas import CategoricalIndex, Index, MultiIndex from pandas.compat import range +from pandas.compat import PY3, PYPY, lrange, lzip, range, u +from pandas.util.testing import assert_almost_equal +from pandas.core.indexes.base import InvalidIndexError +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) + + +def test_get_loc(_index): + assert _index.get_loc(('foo', 'two')) == 1 + assert _index.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, _index.get_loc, ('bar', 'two')) + pytest.raises(KeyError, _index.get_loc, 'quux') + + pytest.raises(NotImplementedError, _index.get_loc, 'foo', + method='nearest') + + # 3 levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + pytest.raises(KeyError, index.get_loc, (1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + +def test_get_loc_duplicates(): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert rs == xp + + +def test_get_loc_level(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + pytest.raises(KeyError, index.get_loc_level, (2, 2)) + + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) +def test_get_loc_nan(level, null_val): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, null_val], dtype=type(null_val)) + key[level] = null_val + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_missing_nan(): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + + +@pytest.mark.parametrize('dtype1', [int, float, bool, str]) +@pytest.mark.parametrize('dtype2', [int, float, bool, str]) +def test_get_loc_multiple_dtypes(dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) +def test_get_loc_implicit_cast(level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_cast_bool(): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) + + +def test_get_indexer(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + + msg = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, msg): + idx1.get_indexer(idx2) + + +def test_get_indexer_nearest(): + midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='nearest') + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='pad', tolerance=2) def test_set_name_methods(_index, index_names): @@ -454,3 +633,23 @@ def test_get_unique_index(_index): result = idx._get_unique_index(dropna=dropna) assert result.unique tm.assert_index_equal(result, expected) + + +def test_get_indexer_consistency(named_index): + # See GH 16819 + for name, index in named_index.items(): + if isinstance(index, IntervalIndex): + continue + + if index.is_unique or isinstance(index, CategoricalIndex): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) + + indexer, _ = index.get_indexer_non_unique(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 716bcab00723b..4db3c43ceca62 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -11,6 +11,10 @@ from pandas.compat import lrange from pandas.core.indexes.base import InvalidIndexError from pandas.util.testing import assert_almost_equal +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) def test_slice_locs_partial(_index): @@ -107,173 +111,133 @@ def test_slice_locs_not_contained(): assert result == (0, len(index)) -def test_get_loc(_index): - assert _index.get_loc(('foo', 'two')) == 1 - assert _index.get_loc(('baz', 'two')) == 3 - pytest.raises(KeyError, _index.get_loc, ('bar', 'two')) - pytest.raises(KeyError, _index.get_loc, 'quux') - pytest.raises(NotImplementedError, _index.get_loc, 'foo', - method='nearest') +def test_to_series(_index): + # assert that we are creating a copy of the index - # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - pytest.raises(KeyError, index.get_loc, (1, 1)) - assert index.get_loc((2, 0)) == slice(3, 5) + idx = _index + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name -def test_get_loc_duplicates(): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - assert result == expected - # pytest.raises(Exception, index.get_loc, 2) +def test_to_series_with_arguments(_index): + # GH18699 - index = Index(['c', 'a', 'a', 'b', 'b']) - rs = index.get_loc('c') - xp = 0 - assert rs == xp + # index kwarg + idx = _index + s = idx.to_series(index=idx) + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name -def test_get_loc_level(): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + # name kwarg + idx = _index + s = idx.to_series(name='__test') + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name + + +def test_shift(_index): + + # GH8083 test the base class for shift + idx = _index + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) + + +def test_insert_base(named_index): + + for name, idx in compat.iteritems(named_index): + result = idx[1:4] + + if not len(idx): + continue + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(named_index): + + for name, idx in compat.iteritems(named_index): + + if not len(idx): + continue + + if isinstance(idx, RangeIndex): + # tested in class + continue + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - assert loc == expected - assert new_index.equals(exp_index) + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - assert loc == expected - assert new_index is None - - pytest.raises(KeyError, index.get_loc_level, (2, 2)) - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( - [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - assert result == expected - assert new_index.equals(index.droplevel(0)) +def test_fillna(named_index): + # GH 11343 + for name, index in named_index.items(): + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + msg = "'value' must be a scalar, passed: " + with tm.assert_raises_regex(TypeError, msg): + idx.fillna([idx[0]]) -@pytest.mark.parametrize('level', [0, 1]) -@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) -def test_get_loc_nan(level, null_val): - # GH 18485 : NaN in MultiIndex - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - levels[level] = np.array([0, null_val], dtype=type(null_val)) - key[level] = null_val - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 + idx = index.copy() + values = idx.values + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan -def test_get_loc_missing_nan(): - # GH 8569 - idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) - assert isinstance(idx.get_loc(1), slice) - pytest.raises(KeyError, idx.get_loc, 3) - pytest.raises(KeyError, idx.get_loc, np.nan) - pytest.raises(KeyError, idx.get_loc, [np.nan]) - - -@pytest.mark.parametrize('dtype1', [int, float, bool, str]) -@pytest.mark.parametrize('dtype2', [int, float, bool, str]) -def test_get_loc_multiple_dtypes(dtype1, dtype2): - # GH 18520 - levels = [np.array([0, 1]).astype(dtype1), - np.array([0, 1]).astype(dtype2)] - idx = pd.MultiIndex.from_product(levels) - assert idx.get_loc(idx[2]) == 2 - - -@pytest.mark.parametrize('level', [0, 1]) -@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) -def test_get_loc_implicit_cast(level, dtypes): - # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - lev_dtype, key_dtype = dtypes - levels[level] = np.array([0, 1], dtype=lev_dtype) - key[level] = key_dtype(1) - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - -def test_get_loc_cast_bool(): - # GH 19086 : int is casted to bool, but not vice-versa - levels = [[False, True], np.arange(2, dtype='int64')] - idx = MultiIndex.from_product(levels) - - assert idx.get_loc((0, 1)) == 1 - assert idx.get_loc((1, 0)) == 2 - - pytest.raises(KeyError, idx.get_loc, (False, True)) - pytest.raises(KeyError, idx.get_loc, (True, False)) - - -def test_get_indexer(): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) - - r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - assert_almost_equal(r1, e1) + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2.values) - rexp1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, rexp1) - r1 = idx1.get_indexer([1, 2, 3]) - assert (r1 == [-1, -1, -1]).all() +def test_putmask_with_wrong_mask(_index): + # GH18368 + index = _index - # create index with duplicates - idx1 = Index(lrange(10) + lrange(10)) - idx2 = Index(lrange(20)) - - msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): - idx1.get_indexer(idx2) + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) + 1, np.bool), 1) + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) - 1, np.bool), 1) -def test_get_indexer_nearest(): - midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='nearest') - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='pad', tolerance=2) + with pytest.raises(ValueError): + index.putmask('foo', 1) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index c52f71934bc6e..325fc8499b94c 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -2,19 +2,17 @@ import re - -import pytest - import numpy as np - import pandas as pd - -from pandas import DataFrame, MultiIndex, date_range +import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, + Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + date_range, isna) from pandas.compat import long, lrange, range -from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike - -import pandas.util.testing as tm +from pandas.errors import PerformanceWarning, UnsortedIndexError def test_labels_dtypes(): @@ -133,16 +131,6 @@ def test_hash_collisions(): assert result == i -def test_equals_missing_values(): - # make sure take is not using -1 - i = pd.MultiIndex.from_tuples([(0, pd.NaT), - (0, pd.Timestamp('20130101'))]) - result = i[0:1].equals(i[0]) - assert not result - result = i[1:2].equals(i[1]) - assert not result - - def test_dims(): pass @@ -250,67 +238,6 @@ def test_metadata_immutable(_index): names[0] = names[0] -def test_boolean_context_compat2(): - - # boolean context compat - # GH7897 - i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) - common = i1.intersection(i2) - - def f(): - if common: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - - -def test_inplace_mutation_resets_values(): - levels = [['a', 'b', 'c'], [4]] - levels2 = [[1, 2, 3], ['a']] - labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] - - mi1 = MultiIndex(levels=levels, labels=labels) - mi2 = MultiIndex(levels=levels2, labels=labels) - vals = mi1.values.copy() - vals2 = mi2.values.copy() - - assert mi1._tuples is not None - - # Make sure level setting works - new_vals = mi1.set_levels(levels2).values - tm.assert_almost_equal(vals2, new_vals) - - # Non-inplace doesn't kill _tuples [implementation detail] - tm.assert_almost_equal(mi1._tuples, vals) - - # ...and values is still same too - tm.assert_almost_equal(mi1.values, vals) - - # Inplace should kill _tuples - mi1.set_levels(levels2, inplace=True) - tm.assert_almost_equal(mi1.values, vals2) - - # Make sure label setting works too - labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] - exp_values = np.empty((6,), dtype=object) - exp_values[:] = [(long(1), 'a')] * 6 - - # Must be 1d array of tuples - assert exp_values.shape == (6,) - new_values = mi2.set_labels(labels2).values - - # Not inplace shouldn't change - tm.assert_almost_equal(mi2._tuples, vals2) - - # Should have correct values - tm.assert_almost_equal(exp_values, new_values) - - # ...and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) - tm.assert_almost_equal(mi2.values, new_values) - - def test_level_setting_resets_attributes(): ind = pd.MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] @@ -482,3 +409,42 @@ def test_unsortedindex_doc_examples(): assert dfm.index.is_lexsorted() assert dfm.index.lexsort_depth == 2 + +def test_hash_error(indices): + index = indices + tm.assert_raises_regex(TypeError, "unhashable type: %r" % + type(index).__name__, hash, indices) + + +def test_mutability(indices): + if not len(indices): + return + pytest.raises(TypeError, indices.__setitem__, 0, indices[0]) + + +def test_wrong_number_names(indices): + def testit(ind): + ind.names = ["apple", "banana", "carrot"] + tm.assert_raises_regex(ValueError, "^Length", testit, indices) + + +def test_memory_usage(named_index): + for name, index in compat.iteritems(named_index): + result = index.memory_usage() + if len(index): + index.get_loc(index[0]) + result2 = index.memory_usage() + result3 = index.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(index, (RangeIndex, IntervalIndex)): + assert result2 > result + + if index.inferred_type == 'object': + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 5ea16d6a1243b..1bf8bc675f04e 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -87,3 +87,11 @@ def test_join_multi(): tm.assert_index_equal(jidx, midx) assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_self_unique(_index, join_type): + index = _index + if index.is_unique: + joined = index.join(index, how=join_type) + assert (index == joined).all() + diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index 3baa420b0be77..7f65e13d4dd14 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -1,9 +1,13 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np import pandas as pd -from pandas import Index, MultiIndex +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) def test_is_monotonic_increasing(): @@ -157,3 +161,49 @@ def test_is_strictly_monotonic_decreasing(): labels=[[0, 0, 1, 1], [0, 0, 0, 1]]) assert idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing + + +def test_searchsorted_monotonic(indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, IntervalIndex)): + return + + # nothing to test if the index is empty + if indices.empty: + return + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side='right') + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side='left') diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index dc2df16f0bdbd..c6bf17b1071c2 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -7,9 +7,13 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import DataFrame, Index, MultiIndex, date_range, period_range +from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, + Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + date_range, isna, period_range) from pandas.compat import PYPY, lrange, lzip, range, u from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.util.testing import assert_copy @@ -17,125 +21,6 @@ def check_level_names(index, names): assert [level.name for level in index.levels] == list(names) -def test_difference(_index): - - first = _index - result = first.difference(_index[-3:]) - expected = MultiIndex.from_tuples(sorted(_index[:-3].values), - sortorder=0, - names=_index.names) - - assert isinstance(result, MultiIndex) - assert result.equals(expected) - assert result.names == _index.names - - # empty difference: reflexive - result = _index.difference(_index) - expected = _index[:0] - assert result.equals(expected) - assert result.names == _index.names - - # empty difference: superset - result = _index[-3:].difference(_index) - expected = _index[:0] - assert result.equals(expected) - assert result.names == _index.names - - # empty difference: degenerate - result = _index[:0].difference(_index) - expected = _index[:0] - assert result.equals(expected) - assert result.names == _index.names - - # names not the same - chunklet = _index[-3:] - chunklet.names = ['foo', 'baz'] - result = first.difference(chunklet) - assert result.names == (None, None) - - # empty, but non-equal - result = _index.difference(_index.sortlevel(1)[0]) - assert len(result) == 0 - - # raise Exception called with non-MultiIndex - result = first.difference(first.values) - assert result.equals(first[:0]) - - # name from empty array - result = first.difference([]) - assert first.equals(result) - assert first.names == result.names - - # name from non-empty array - result = first.difference([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( - 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) - expected.names = first.names - assert first.names == result.names - tm.assert_raises_regex(TypeError, "other must be a MultiIndex " - "or a list of tuples", - first.difference, [1, 2, 3, 4, 5]) - - -def test_union(_index): - piece1 = _index[:5][::-1] - piece2 = _index[3:] - - the_union = piece1 | piece2 - - tups = sorted(_index.values) - expected = MultiIndex.from_tuples(tups) - - assert the_union.equals(expected) - - # corner case, pass self or empty thing: - the_union = _index.union(_index) - assert the_union is _index - - the_union = _index.union(_index[:0]) - assert the_union is _index - - # won't work in python 3 - # tuples = _index.values - # result = _index[:4] | tuples[4:] - # assert result.equals(tuples) - - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) - - # result = other.union(_index) - # assert ('foo', 'one') in result - # assert 'B' in result - - # result2 = _index.union(other) - # assert result.equals(result2) - - -def test_intersection(_index): - piece1 = _index[:5][::-1] - piece2 = _index[3:] - - the_int = piece1 & piece2 - tups = sorted(_index[3:5].values) - expected = MultiIndex.from_tuples(tups) - assert the_int.equals(expected) - - # corner case, pass self - the_int = _index.intersection(_index) - assert the_int is _index - - # empty intersection: disjoint - empty = _index[:2] & _index[2:] - expected = _index[:0] - assert empty.equals(expected) - - # can't do in python 3 - # tuples = _index.values - # result = _index & tuples - # assert result.equals(tuples) - - def test_insert(_index): # key contained in all levels new_index = _index.insert(0, ('bar', 'two')) @@ -203,80 +88,10 @@ def test_insert(_index): tm.assert_series_equal(left, right) -def test_is_all_dates(_index): - assert not _index.is_all_dates - - -def test_is_numeric(_index): - # MultiIndex is never numeric - assert not _index.is_numeric() - - def test_bounds(_index): _index._bounds -def test_equals_multi(_index): - assert _index.equals(_index) - assert not _index.equals(_index.values) - assert _index.equals(Index(_index.values)) - - assert _index.equal_levels(_index) - assert not _index.equals(_index[:-1]) - assert not _index.equals(_index[-1]) - - # different number of levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - assert not index.equals(index2) - assert not index.equal_levels(index2) - - # levels are different - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not _index.equals(index) - assert not _index.equal_levels(index) - - # some of the labels are different - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - assert not _index.equals(index) - - -def test_identical(_index): - mi = _index.copy() - mi2 = _index.copy() - assert mi.identical(mi2) - - mi = mi.set_names(['new1', 'new2']) - assert mi.equals(mi2) - assert not mi.identical(mi2) - - mi2 = mi2.set_names(['new1', 'new2']) - assert mi.identical(mi2) - - mi3 = Index(mi.tolist(), names=mi.names) - mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - assert mi.identical(mi3) - assert not mi.identical(mi4) - assert mi.equals(mi4) - - def test_append(_index): result = _index[:3].append(_index[3:]) assert result.equals(_index) @@ -302,11 +117,6 @@ def test_groupby(_index): tm.assert_dict_equal(groups, exp) -def test_equals_operator(_index): - # GH9785 - assert (_index == _index).all() - - def test_truncate(): major_axis = Index(lrange(4)) minor_axis = Index(lrange(2)) @@ -442,36 +252,6 @@ def test_numpy_repeat(): ValueError, msg, np.repeat, m, reps, axis=1) -def test_is_(): - mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - assert mi.is_(mi) - assert mi.is_(mi.view()) - assert mi.is_(mi.view().view().view().view()) - mi2 = mi.view() - # names are metadata, they don't change id - mi2.names = ["A", "B"] - assert mi2.is_(mi) - assert mi.is_(mi2) - - assert mi.is_(mi.set_names(["C", "D"])) - mi2 = mi.view() - mi2.set_names(["E", "F"], inplace=True) - assert mi.is_(mi2) - # levels are inherent properties, they change identity - mi3 = mi2.set_levels([lrange(10), lrange(10)]) - assert not mi3.is_(mi2) - # shouldn't change - assert mi2.is_(mi) - mi4 = mi3.view() - - # GH 17464 - Remove duplicate MultiIndex levels - mi4.set_levels([lrange(10), lrange(10)], inplace=True) - assert not mi4.is_(mi3) - mi5 = mi.view() - mi5.set_levels(mi5.levels, inplace=True) - assert not mi5.is_(mi) - - def test_append_mixed_dtypes(): # GH 13660 dti = date_range('2011-01-01', freq='M', periods=3, ) @@ -507,6 +287,42 @@ def test_append_mixed_dtypes(): tm.assert_index_equal(res, exp) +def test_take(named_index): + indexer = [4, 3, 0, 2] + for k, ind in named_index.items(): + + # separate + if k in ['boolIndex', 'tuples', 'empty']: + continue + + result = ind.take(indexer) + expected = ind[indexer] + assert result.equals(expected) + + if not isinstance(ind, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + ind.freq + + +def test_take_invalid_kwargs(_index): + idx = _index + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, idx.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, idx.take, + indices, mode='clip') + + def test_take_fill_value(): # GH 12631 vals = [['A', 'B'], @@ -677,68 +493,6 @@ def test_multiindex_compare(): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") -def test_isin_nan_pypy(): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, True])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, True])) - - -def test_isin(): - values = [('foo', 2), ('bar', 3), ('quux', 4)] - - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - result = idx.isin(values) - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # empty, return dtype bool - idx = MultiIndex.from_arrays([[], []]) - result = idx.isin(values) - assert len(result) == 0 - assert result.dtype == np.bool_ - - -@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") -def test_isin_nan_not_pypy(): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - np.array([False, False])) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - np.array([False, False])) - - -def test_isin_level_kwarg(): - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - - vals_0 = ['foo', 'bar', 'quux'] - vals_1 = [2, 3, 10] - - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) - - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - - pytest.raises(IndexError, idx.isin, vals_0, level=5) - pytest.raises(IndexError, idx.isin, vals_0, level=-5) - - pytest.raises(KeyError, idx.isin, vals_0, level=1.0) - pytest.raises(KeyError, idx.isin, vals_1, level=-1.0) - pytest.raises(KeyError, idx.isin, vals_1, level='A') - - idx.names = ['A', 'B'] - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - - pytest.raises(KeyError, idx.isin, vals_1, level='C') - - def test_duplicate_multiindex_labels(): # GH 17464 # Make sure that a MultiIndex with duplicate levels throws a ValueError @@ -898,3 +652,98 @@ def f(a): tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) + +def test_map(_index): + # callable + index = _index + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype('int64') + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index)]) +def test_map_dictlike(_index, mapper): + + index = _index + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) + + identity = mapper(index.values, index) + + # we don't infer to UInt64 for a dict + if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): + expected = index.astype('int64') + else: + expected = index + + result = index.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + +def test_numpy_ufuncs(named_index): + # test ufuncs of numpy 1.9.2. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + # some functions are skipped because it may return different result + # for unicode input depending on numpy version + + for name, idx in compat.iteritems(named_index): + for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + # PeriodIndex behavior should be changed in future version + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # coerces to float (e.g. np.sin) + with np.errstate(all='ignore'): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + + tm.assert_index_equal(result, exp) + assert isinstance(result, pd.Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + + for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(idx) == 0: + continue + else: + with pytest.raises(Exception): + func(idx) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 071c2c54196bd..13e400a7514de 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -86,3 +86,15 @@ def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ + + +def test_reindex_base(_index): + idx = _index + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + idx.get_indexer(idx, method='invalid') + diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py new file mode 100644 index 0000000000000..598407f077adb --- /dev/null +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -0,0 +1,283 @@ +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) + +import pandas.util.testing as tm +import numpy as np +import pandas as pd + + +def test_setops_errorcases(named_index): + for name, idx in compat.iteritems(named_index): + # # non-iterable input + cases = [0.5, 'xxx'] + methods = [idx.intersection, idx.union, idx.difference, + idx.symmetric_difference] + + for method in methods: + for case in cases: + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) + + +def test_intersection_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.intersection(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.intersection([1, 2, 3]) + + +def test_union_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.union(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.union([1, 2, 3]) + + +def test_difference_base(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[2:] + second = idx[:4] + answer = idx[4:] + result = first.difference(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.difference(case) + elif isinstance(idx, CategoricalIndex): + pass + elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + assert result.__class__ == answer.__class__ + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) + else: + result = first.difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.difference([1, 2, 3]) + + +def test_symmetric_difference(named_index): + for name, idx in compat.iteritems(named_index): + first = idx[1:] + second = idx[:-1] + if isinstance(idx, CategoricalIndex): + pass + else: + answer = idx[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): + result = first.symmetric_difference(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) + + +def test_empty(_index): + # GH 15270 + index = _index + assert not index.empty + assert index[:0].empty + + + +def test_unique_na(): + idx = pd.Index([2, np.nan, 2, 1], name='my_index') + expected = pd.Index([2, np.nan, 1], name='my_index') + result = idx.unique() + tm.assert_index_equal(result, expected) + + +def test_difference(_index): + + first = _index + result = first.difference(_index[-3:]) + expected = MultiIndex.from_tuples(sorted(_index[:-3].values), + sortorder=0, + names=_index.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: reflexive + result = _index.difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: superset + result = _index[-3:].difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # empty difference: degenerate + result = _index[:0].difference(_index) + expected = _index[:0] + assert result.equals(expected) + assert result.names == _index.names + + # names not the same + chunklet = _index[-3:] + chunklet.names = ['foo', 'baz'] + result = first.difference(chunklet) + assert result.names == (None, None) + + # empty, but non-equal + result = _index.difference(_index.sortlevel(1)[0]) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([]) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([('foo', 'one')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( + 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) + expected.names = first.names + assert first.names == result.names + tm.assert_raises_regex(TypeError, "other must be a MultiIndex " + "or a list of tuples", + first.difference, [1, 2, 3, 4, 5]) + + +def test_union(_index): + piece1 = _index[:5][::-1] + piece2 = _index[3:] + + the_union = piece1 | piece2 + + tups = sorted(_index.values) + expected = MultiIndex.from_tuples(tups) + + assert the_union.equals(expected) + + # corner case, pass self or empty thing: + the_union = _index.union(_index) + assert the_union is _index + + the_union = _index.union(_index[:0]) + assert the_union is _index + + # won't work in python 3 + # tuples = _index.values + # result = _index[:4] | tuples[4:] + # assert result.equals(tuples) + + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) + + # result = other.union(_index) + # assert ('foo', 'one') in result + # assert 'B' in result + + # result2 = _index.union(other) + # assert result.equals(result2) + + +def test_intersection(_index): + piece1 = _index[:5][::-1] + piece2 = _index[3:] + + the_int = piece1 & piece2 + tups = sorted(_index[3:5].values) + expected = MultiIndex.from_tuples(tups) + assert the_int.equals(expected) + + # corner case, pass self + the_int = _index.intersection(_index) + assert the_int is _index + + # empty intersection: disjoint + empty = _index[:2] & _index[2:] + expected = _index[:0] + assert empty.equals(expected) + + # can't do in python 3 + # tuples = _index.values + # result = _index & tuples + # assert result.equals(tuples) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 7ced55951feb7..8f8b1ece8c679 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -1,7 +1,16 @@ # -*- coding: utf-8 -*- - - -from pandas import MultiIndex +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, + Int64Index, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, + isna) +from pandas._libs.tslib import iNaT +from pandas.compat import PY3 +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin def test_sortlevel(_index): @@ -54,3 +63,34 @@ def test_sortlevel_deterministic(): sorted_idx, _ = index.sortlevel(1, ascending=False) assert sorted_idx.equals(expected[::-1]) + + +def test_sort(indices): + pytest.raises(TypeError, indices.sort) + + +def test_numpy_argsort(named_index): + for k, ind in named_index.items(): + result = np.argsort(ind) + expected = ind.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(ind), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, + np.argsort, ind, axis=1) + + msg = "the 'kind' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + ind, order=('a', 'b')) From 750b8c6001d2a0860a11c91feb6939d6528975ea Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Wed, 20 Jun 2018 18:15:29 -0500 Subject: [PATCH 3/7] Changes fixture _index to fixture idx --- pandas/tests/indexes/multi/conftest.py | 10 +- pandas/tests/indexes/multi/test_compat.py | 14 +- .../tests/indexes/multi/test_constructor.py | 32 +-- pandas/tests/indexes/multi/test_contains.py | 8 +- pandas/tests/indexes/multi/test_conversion.py | 6 +- pandas/tests/indexes/multi/test_copy.py | 18 +- pandas/tests/indexes/multi/test_drop.py | 50 ++--- .../tests/indexes/multi/test_equivalence.py | 42 ++-- pandas/tests/indexes/multi/test_format.py | 13 +- pandas/tests/indexes/multi/test_get_set.py | 198 +++++++++--------- pandas/tests/indexes/multi/test_indexing.py | 24 +-- pandas/tests/indexes/multi/test_integrity.py | 13 +- pandas/tests/indexes/multi/test_join.py | 38 ++-- pandas/tests/indexes/multi/test_names.py | 36 ++-- pandas/tests/indexes/multi/test_operations.py | 131 ++++++------ pandas/tests/indexes/multi/test_reindex.py | 34 +-- pandas/tests/indexes/multi/test_set_ops.py | 76 ++++--- pandas/tests/indexes/multi/test_sorting.py | 4 +- 18 files changed, 365 insertions(+), 382 deletions(-) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 3bc8bf6a391d4..4e72cd046beb6 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -6,25 +6,25 @@ @pytest.fixture -def _index(): +def idx(): major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) index_names = ['first', 'second'] - idx = MultiIndex( + index = MultiIndex( levels=[major_axis, minor_axis], labels=[major_labels, minor_labels], names=index_names, verify_integrity=False ) - return idx + return index @pytest.fixture -def named_index(_index): - return {'index': _index} +def named_index(idx): + return {'index': idx} @pytest.fixture diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 511bc335a705a..6a33799b41656 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -18,9 +18,7 @@ -def test_numeric_compat(_index): - - idx = _index +def test_numeric_compat(idx): tm.assert_raises_regex(TypeError, "cannot perform __mul__", lambda: idx * 1) tm.assert_raises_regex(TypeError, "cannot perform __rmul__", @@ -37,19 +35,16 @@ def test_numeric_compat(_index): lambda: 1 // idx) -def test_logical_compat(_index): - idx = _index +def test_logical_compat(idx): tm.assert_raises_regex(TypeError, 'cannot perform all', lambda: idx.all()) tm.assert_raises_regex(TypeError, 'cannot perform any', lambda: idx.any()) -def test_boolean_context_compat(_index): +def test_boolean_context_compat(idx): # boolean context compat - idx = _index - def f(): if idx: pass @@ -118,8 +113,7 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(mi2.values, new_values) -def test_ndarray_compat_properties(_index, _compat_props): - idx = _index +def test_ndarray_compat_properties(idx, _compat_props): assert idx.T.equals(idx) assert idx.transpose().equals(idx) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 6e576c1c7e043..d030df4493f3b 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -52,7 +52,7 @@ def test_constructor_nonhashable_names(): tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) -def test_constructor_mismatched_label_levels(_index): +def test_constructor_mismatched_label_levels(idx): labels = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] tm.assert_raises_regex(ValueError, "Length of levels and labels " @@ -71,10 +71,10 @@ def test_constructor_mismatched_label_levels(_index): # external API with tm.assert_raises_regex(ValueError, length_error): - _index.copy().set_levels([['a'], ['b']]) + idx.copy().set_levels([['a'], ['b']]) with tm.assert_raises_regex(ValueError, label_error): - _index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) def test_copy_in_constructor(): @@ -167,14 +167,14 @@ def test_reconstruct_remove_unused(): assert result2.is_(result) -def test_from_arrays(_index): +def test_from_arrays(idx): arrays = [] - for lev, lab in zip(_index.levels, _index.labels): + for lev, lab in zip(idx.levels, idx.labels): arrays.append(np.asarray(lev).take(lab)) # list of arrays as input - result = MultiIndex.from_arrays(arrays, names=_index.names) - tm.assert_index_equal(result, _index) + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) # infer correctly result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], @@ -183,15 +183,15 @@ def test_from_arrays(_index): assert result.levels[1].equals(Index(['a', 'b'])) -def test_from_arrays_iterator(_index): +def test_from_arrays_iterator(idx): # GH 18434 arrays = [] - for lev, lab in zip(_index.levels, _index.labels): + for lev, lab in zip(idx.levels, idx.labels): arrays.append(np.asarray(lev).take(lab)) # iterator as input - result = MultiIndex.from_arrays(iter(arrays), names=_index.names) - tm.assert_index_equal(result, _index) + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) # invalid iterator input with tm.assert_raises_regex( @@ -376,9 +376,9 @@ def test_from_tuples_empty(): tm.assert_index_equal(result, expected) -def test_from_tuples_index_values(_index): - result = MultiIndex.from_tuples(_index) - assert (result.values == _index.values).all() +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() def test_from_product_empty(): @@ -478,11 +478,11 @@ def test_from_product_iterator(): MultiIndex.from_product(0) -def test_create_index_existing_name(_index): +def test_create_index_existing_name(idx): # GH11193, when an existing index is passed, and a new name is not # specified, the new index should inherit the previous object name - index = _index + index = idx index.names = ['foo', 'bar'] result = pd.Index(index) tm.assert_index_equal( diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index 44edc3f82249f..de5c27a4bd615 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -25,10 +25,10 @@ def test_contains_with_nat(): assert val in mi -def test_contains(_index): - assert ('foo', 'two') in _index - assert ('bar', 'two') not in _index - assert None not in _index +def test_contains(idx): + assert ('foo', 'two') in idx + assert ('bar', 'two') not in idx + assert None not in idx @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 318ebb660a6b6..2ec3eff92be13 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -10,9 +10,9 @@ from pandas.util.testing import assert_almost_equal -def test_tolist(_index): - result = _index.tolist() - exp = list(_index.values) +def test_tolist(idx): + result = idx.tolist() + exp = list(idx.values) assert result == exp diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 1d0f8cb6160d6..63bdac5122ac3 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -32,21 +32,21 @@ def assert_multiindex_copied(copy, original): assert copy.sortorder == original.sortorder -def test_copy(_index): - i_copy = _index.copy() +def test_copy(idx): + i_copy = idx.copy() - assert_multiindex_copied(i_copy, _index) + assert_multiindex_copied(i_copy, idx) -def test_shallow_copy(_index): - i_copy = _index._shallow_copy() +def test_shallow_copy(idx): + i_copy = idx._shallow_copy() - assert_multiindex_copied(i_copy, _index) + assert_multiindex_copied(i_copy, idx) -def test_view(_index): - i_view = _index.view() - assert_multiindex_copied(i_view, _index) +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) def test_copy_name(named_index): diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 2e4d66d9ef993..eac2feea3103f 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -10,61 +10,61 @@ from pandas.errors import PerformanceWarning -def test_drop(_index): - dropped = _index.drop([('foo', 'two'), ('qux', 'one')]) +def test_drop(idx): + dropped = idx.drop([('foo', 'two'), ('qux', 'one')]) index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) - dropped2 = _index.drop(index) + dropped2 = idx.drop(index) - expected = _index[[0, 2, 3, 5]] + expected = idx[[0, 2, 3, 5]] tm.assert_index_equal(dropped, expected) tm.assert_index_equal(dropped2, expected) - dropped = _index.drop(['bar']) - expected = _index[[0, 1, 3, 4, 5]] + dropped = idx.drop(['bar']) + expected = idx[[0, 1, 3, 4, 5]] tm.assert_index_equal(dropped, expected) - dropped = _index.drop('foo') - expected = _index[[2, 3, 4, 5]] + dropped = idx.drop('foo') + expected = idx[[2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) index = MultiIndex.from_tuples([('bar', 'two')]) - pytest.raises(KeyError, _index.drop, [('bar', 'two')]) - pytest.raises(KeyError, _index.drop, index) - pytest.raises(KeyError, _index.drop, ['foo', 'two']) + pytest.raises(KeyError, idx.drop, [('bar', 'two')]) + pytest.raises(KeyError, idx.drop, index) + pytest.raises(KeyError, idx.drop, ['foo', 'two']) # partially correct argument mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - pytest.raises(KeyError, _index.drop, mixed_index) + pytest.raises(KeyError, idx.drop, mixed_index) # error='ignore' - dropped = _index.drop(index, errors='ignore') - expected = _index[[0, 1, 2, 3, 4, 5]] + dropped = idx.drop(index, errors='ignore') + expected = idx[[0, 1, 2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) - dropped = _index.drop(mixed_index, errors='ignore') - expected = _index[[0, 1, 2, 3, 5]] + dropped = idx.drop(mixed_index, errors='ignore') + expected = idx[[0, 1, 2, 3, 5]] tm.assert_index_equal(dropped, expected) - dropped = _index.drop(['foo', 'two'], errors='ignore') - expected = _index[[2, 3, 4, 5]] + dropped = idx.drop(['foo', 'two'], errors='ignore') + expected = idx[[2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) # mixed partial / full drop - dropped = _index.drop(['foo', ('qux', 'one')]) - expected = _index[[2, 3, 5]] + dropped = idx.drop(['foo', ('qux', 'one')]) + expected = idx[[2, 3, 5]] tm.assert_index_equal(dropped, expected) # mixed partial / full drop / error='ignore' mixed_index = ['foo', ('qux', 'one'), 'two'] - pytest.raises(KeyError, _index.drop, mixed_index) - dropped = _index.drop(mixed_index, errors='ignore') - expected = _index[[2, 3, 5]] + pytest.raises(KeyError, idx.drop, mixed_index) + dropped = idx.drop(mixed_index, errors='ignore') + expected = idx[[2, 3, 5]] tm.assert_index_equal(dropped, expected) -def test_droplevel_with_names(_index): - index = _index[_index.get_loc('foo')] +def test_droplevel_with_names(idx): + index = idx[idx.get_loc('foo')] dropped = index.droplevel(0) assert dropped.name == 'second' diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 179a214d6ba21..6a3d9012b0fb6 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -40,9 +40,9 @@ def test_equals(named_index): assert not idx.equals(pd.Series(idx)) -def test_equals_op(_index): +def test_equals_op(idx): # GH9947, GH10637 - index_a = _index + index_a = idx if isinstance(index_a, PeriodIndex): return @@ -103,14 +103,14 @@ def test_equals_op(_index): tm.assert_series_equal(series_a == item, Series(expected3)) -def test_equals_multi(_index): - assert _index.equals(_index) - assert not _index.equals(_index.values) - assert _index.equals(Index(_index.values)) +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) - assert _index.equal_levels(_index) - assert not _index.equals(_index[:-1]) - assert not _index.equals(_index[-1]) + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) # different number of levels index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( @@ -130,8 +130,8 @@ def test_equals_multi(_index): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - assert not _index.equals(index) - assert not _index.equal_levels(index) + assert not idx.equals(index) + assert not idx.equal_levels(index) # some of the labels are different major_axis = Index(['foo', 'bar', 'baz', 'qux']) @@ -142,12 +142,12 @@ def test_equals_multi(_index): index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) - assert not _index.equals(index) + assert not idx.equals(index) -def test_identical(_index): - mi = _index.copy() - mi2 = _index.copy() +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() assert mi.identical(mi2) mi = mi.set_names(['new1', 'new2']) @@ -164,9 +164,9 @@ def test_identical(_index): assert mi.equals(mi4) -def test_equals_operator(_index): +def test_equals_operator(idx): # GH9785 - assert (_index == _index).all() + assert (idx == idx).all() def test_equals_missing_values(): @@ -209,13 +209,13 @@ def test_is_(): assert not mi5.is_(mi) -def test_is_all_dates(_index): - assert not _index.is_all_dates +def test_is_all_dates(idx): + assert not idx.is_all_dates -def test_is_numeric(_index): +def test_is_numeric(idx): # MultiIndex is never numeric - assert not _index.is_numeric() + assert not idx.is_numeric() def test_nulls(named_index): diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index e06aec26b9105..21e8a199cadd9 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -15,9 +15,9 @@ def test_dtype_str(indices): assert dtype == str(indices.dtype) -def test_format(_index): - _index.format() - _index[:0].format() +def test_format(idx): + idx.format() + idx[:0].format() def test_format_integer_names(): @@ -26,14 +26,14 @@ def test_format_integer_names(): index.format(names=True) -def test_format_sparse_config(_index): +def test_format_sparse_config(idx): warn_filters = warnings.filters warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") # GH1538 pd.set_option('display.multi_sparse', False) - result = _index.format() + result = idx.format() assert result[1] == 'foo two' tm.reset_display_options() @@ -125,9 +125,8 @@ def test_bytestring_with_unicode(): str(idx) -def test_repr_max_seq_item_setting(_index): +def test_repr_max_seq_item_setting(idx): # GH10182 - idx = _index idx = idx.repeat(50) with pd.option_context("display.max_seq_items", None): repr(idx) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 5a270c019a4f2..df8c92632a5c5 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -16,13 +16,13 @@ isna) -def test_get_loc(_index): - assert _index.get_loc(('foo', 'two')) == 1 - assert _index.get_loc(('baz', 'two')) == 3 - pytest.raises(KeyError, _index.get_loc, ('bar', 'two')) - pytest.raises(KeyError, _index.get_loc, 'quux') +def test_get_loc(idx): + assert idx.get_loc(('foo', 'two')) == 1 + assert idx.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, idx.get_loc, ('bar', 'two')) + pytest.raises(KeyError, idx.get_loc, 'quux') - pytest.raises(NotImplementedError, _index.get_loc, 'foo', + pytest.raises(NotImplementedError, idx.get_loc, 'foo', method='nearest') # 3 levels @@ -188,12 +188,12 @@ def test_get_indexer_nearest(): midx.get_indexer(['a'], method='pad', tolerance=2) -def test_set_name_methods(_index, index_names): +def test_set_name_methods(idx, index_names): # so long as these are synonyms, we don't need to test set_names - assert _index.rename == _index.set_names + assert idx.rename == idx.set_names new_names = [name + "SUFFIX" for name in index_names] - ind = _index.set_names(new_names) - assert _index.names == index_names + ind = idx.set_names(new_names) + assert idx.names == index_names assert ind.names == new_names with tm.assert_raises_regex(ValueError, "^Length"): ind.set_names(new_names + new_names) @@ -203,8 +203,8 @@ def test_set_name_methods(_index, index_names): assert ind.names == new_names2 # set names for specific level (# GH7792) - ind = _index.set_names(new_names[0], level=0) - assert _index.names == index_names + ind = idx.set_names(new_names[0], level=0) + assert idx.names == index_names assert ind.names == [new_names[0], index_names[1]] res = ind.set_names(new_names2[0], level=0, inplace=True) @@ -212,8 +212,8 @@ def test_set_name_methods(_index, index_names): assert ind.names == [new_names2[0], index_names[1]] # set names for multiple levels - ind = _index.set_names(new_names, level=[0, 1]) - assert _index.names == index_names + ind = idx.set_names(new_names, level=[0, 1]) + assert idx.names == index_names assert ind.names == new_names res = ind.set_names(new_names2, level=[0, 1], inplace=True) @@ -221,29 +221,29 @@ def test_set_name_methods(_index, index_names): assert ind.names == new_names2 -def test_set_levels_labels_directly(_index): +def test_set_levels_labels_directly(idx): # setting levels/labels directly raises AttributeError - levels = _index.levels + levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - labels = _index.labels + labels = idx.labels major_labels, minor_labels = labels major_labels = [(x + 1) % 3 for x in major_labels] minor_labels = [(x + 1) % 1 for x in minor_labels] new_labels = [major_labels, minor_labels] with pytest.raises(AttributeError): - _index.levels = new_levels + idx.levels = new_levels with pytest.raises(AttributeError): - _index.labels = new_labels + idx.labels = new_labels -def test_set_levels(_index): +def test_set_levels(idx): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. - levels = _index.levels + levels = idx.levels new_levels = [[lev + 'a' for lev in level] for level in levels] def assert_matching(actual, expected, check_dtype=False): @@ -256,81 +256,81 @@ def assert_matching(actual, expected, check_dtype=False): tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) # level changing [w/o mutation] - ind2 = _index.set_levels(new_levels) + ind2 = idx.set_levels(new_levels) assert_matching(ind2.levels, new_levels) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) # level changing [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_levels(new_levels, inplace=True) assert inplace_return is None assert_matching(ind2.levels, new_levels) # level changing specific level [w/o mutation] - ind2 = _index.set_levels(new_levels[0], level=0) + ind2 = idx.set_levels(new_levels[0], level=0) assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) - ind2 = _index.set_levels(new_levels[1], level=1) + ind2 = idx.set_levels(new_levels[1], level=1) assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) # level changing multiple levels [w/o mutation] - ind2 = _index.set_levels(new_levels, level=[0, 1]) + ind2 = idx.set_levels(new_levels, level=[0, 1]) assert_matching(ind2.levels, new_levels) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) # level changing specific level [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) assert inplace_return is None assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) assert inplace_return is None assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) # level changing multiple levels [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) assert inplace_return is None assert_matching(ind2.levels, new_levels) - assert_matching(_index.levels, levels) + assert_matching(idx.levels, levels) # illegal level changing should not change levels # GH 13754 - original_index = _index.copy() + original_index = idx.copy() for inplace in [True, False]: with tm.assert_raises_regex(ValueError, "^On"): - _index.set_levels(['c'], level=0, inplace=inplace) - assert_matching(_index.levels, original_index.levels, + idx.set_levels(['c'], level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) with tm.assert_raises_regex(ValueError, "^On"): - _index.set_labels([0, 1, 2, 3, 4, 5], level=0, + idx.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) - assert_matching(_index.labels, original_index.labels, + assert_matching(idx.labels, original_index.labels, check_dtype=True) with tm.assert_raises_regex(TypeError, "^Levels"): - _index.set_levels('c', level=0, inplace=inplace) - assert_matching(_index.levels, original_index.levels, + idx.set_levels('c', level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) with tm.assert_raises_regex(TypeError, "^Labels"): - _index.set_labels(1, level=0, inplace=inplace) - assert_matching(_index.labels, original_index.labels, + idx.set_labels(1, level=0, inplace=inplace) + assert_matching(idx.labels, original_index.labels, check_dtype=True) -def test_set_labels(_index): +def test_set_labels(idx): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. - labels = _index.labels + labels = idx.labels major_labels, minor_labels = labels major_labels = [(x + 1) % 3 for x in major_labels] minor_labels = [(x + 1) % 1 for x in minor_labels] @@ -346,50 +346,50 @@ def assert_matching(actual, expected): tm.assert_numpy_array_equal(act, exp) # label changing [w/o mutation] - ind2 = _index.set_labels(new_labels) + ind2 = idx.set_labels(new_labels) assert_matching(ind2.labels, new_labels) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) # label changing [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_labels(new_labels, inplace=True) assert inplace_return is None assert_matching(ind2.labels, new_labels) # label changing specific level [w/o mutation] - ind2 = _index.set_labels(new_labels[0], level=0) + ind2 = idx.set_labels(new_labels[0], level=0) assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) - ind2 = _index.set_labels(new_labels[1], level=1) + ind2 = idx.set_labels(new_labels[1], level=1) assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) # label changing multiple levels [w/o mutation] - ind2 = _index.set_labels(new_labels, level=[0, 1]) + ind2 = idx.set_labels(new_labels, level=[0, 1]) assert_matching(ind2.labels, new_labels) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) # label changing specific level [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) assert inplace_return is None assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) assert inplace_return is None assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) # label changing multiple levels [w/ mutation] - ind2 = _index.copy() + ind2 = idx.copy() inplace_return = ind2.set_labels(new_labels, level=[0, 1], inplace=True) assert inplace_return is None assert_matching(ind2.labels, new_labels) - assert_matching(_index.labels, labels) + assert_matching(idx.labels, labels) # label changing for levels of different magnitude of categories ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) @@ -407,51 +407,51 @@ def assert_matching(actual, expected): assert result.equals(expected) -def test_set_levels_labels_names_bad_input(_index): - levels, labels = _index.levels, _index.labels - names = _index.names +def test_set_levels_labels_names_bad_input(idx): + levels, labels = idx.levels, idx.labels + names = idx.names with tm.assert_raises_regex(ValueError, 'Length of levels'): - _index.set_levels([levels[0]]) + idx.set_levels([levels[0]]) with tm.assert_raises_regex(ValueError, 'Length of labels'): - _index.set_labels([labels[0]]) + idx.set_labels([labels[0]]) with tm.assert_raises_regex(ValueError, 'Length of names'): - _index.set_names([names[0]]) + idx.set_names([names[0]]) # shouldn't scalar data error, instead should demand list-like with tm.assert_raises_regex(TypeError, 'list of lists-like'): - _index.set_levels(levels[0]) + idx.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like with tm.assert_raises_regex(TypeError, 'list of lists-like'): - _index.set_labels(labels[0]) + idx.set_labels(labels[0]) # shouldn't scalar data error, instead should demand list-like with tm.assert_raises_regex(TypeError, 'list-like'): - _index.set_names(names[0]) + idx.set_names(names[0]) # should have equal lengths with tm.assert_raises_regex(TypeError, 'list of lists-like'): - _index.set_levels(levels[0], level=[0, 1]) + idx.set_levels(levels[0], level=[0, 1]) with tm.assert_raises_regex(TypeError, 'list-like'): - _index.set_levels(levels, level=0) + idx.set_levels(levels, level=0) # should have equal lengths with tm.assert_raises_regex(TypeError, 'list of lists-like'): - _index.set_labels(labels[0], level=[0, 1]) + idx.set_labels(labels[0], level=[0, 1]) with tm.assert_raises_regex(TypeError, 'list-like'): - _index.set_labels(labels, level=0) + idx.set_labels(labels, level=0) # should have equal lengths with tm.assert_raises_regex(ValueError, 'Length of names'): - _index.set_names(names[0], level=[0, 1]) + idx.set_names(names[0], level=[0, 1]) with tm.assert_raises_regex(TypeError, 'string'): - _index.set_names(names, level=0) + idx.set_names(names, level=0) @pytest.mark.parametrize('inplace', [True, False]) @@ -505,24 +505,24 @@ def test_set_value_keeps_names(): assert df.index.names == ('Name', 'Number') -def test_get_level_number_integer(_index): - _index.names = [1, 0] - assert _index._get_level_number(1) == 0 - assert _index._get_level_number(0) == 1 - pytest.raises(IndexError, _index._get_level_number, 2) +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + pytest.raises(IndexError, idx._get_level_number, 2) tm.assert_raises_regex(KeyError, 'Level fourth not found', - _index._get_level_number, 'fourth') + idx._get_level_number, 'fourth') -def test_get_level_values(_index): - result = _index.get_level_values(0) +def test_get_level_values(idx): + result = idx.get_level_values(0) expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], name='first') tm.assert_index_equal(result, expected) assert result.name == 'first' - result = _index.get_level_values('first') - expected = _index.get_level_values(0) + result = idx.get_level_values('first') + expected = idx.get_level_values(0) tm.assert_index_equal(result, expected) # GH 10460 @@ -538,25 +538,25 @@ def test_get_level_values(_index): tm.assert_index_equal(index.get_level_values(1), exp) -def test_getitem(_index): +def test_getitem(idx): # scalar - assert _index[2] == ('bar', 'one') + assert idx[2] == ('bar', 'one') # slice - result = _index[2:5] - expected = _index[[2, 3, 4]] + result = idx[2:5] + expected = idx[[2, 3, 4]] assert result.equals(expected) # boolean - result = _index[[True, False, True, False, True, True]] - result2 = _index[np.array([True, False, True, False, True, True])] - expected = _index[[0, 2, 4, 5]] + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] assert result.equals(expected) assert result2.equals(expected) -def test_getitem_group_select(_index): - sorted_idx, _ = _index.sortlevel(0) +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) assert sorted_idx.get_loc('baz') == slice(3, 4) assert sorted_idx.get_loc('foo') == slice(0, 2) @@ -625,9 +625,9 @@ def test_get_level_values_na(): tm.assert_index_equal(result, expected) -def test_get_unique_index(_index): - idx = _index[[0, 1, 0, 1, 1, 0, 0]] - expected = _index._shallow_copy(idx[[0, 1]]) +def test_get_unique_index(idx): + idx = idx[[0, 1, 0, 1, 1, 0, 0]] + expected = idx._shallow_copy(idx[[0, 1]]) for dropna in [False, True]: result = idx._get_unique_index(dropna=dropna) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 4db3c43ceca62..41bb8b4c19aa1 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -17,8 +17,8 @@ isna) -def test_slice_locs_partial(_index): - sorted_idx, _ = _index.sortlevel(0) +def test_slice_locs_partial(idx): + sorted_idx, _ = idx.sortlevel(0) result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) assert result == (1, 5) @@ -112,21 +112,19 @@ def test_slice_locs_not_contained(): -def test_to_series(_index): +def test_to_series(idx): # assert that we are creating a copy of the index - idx = _index s = idx.to_series() assert s.values is not idx.values assert s.index is not idx assert s.name == idx.name -def test_to_series_with_arguments(_index): +def test_to_series_with_arguments(idx): # GH18699 # index kwarg - idx = _index s = idx.to_series(index=idx) assert s.values is not idx.values @@ -134,7 +132,7 @@ def test_to_series_with_arguments(_index): assert s.name == idx.name # name kwarg - idx = _index + idx = idx s = idx.to_series(name='__test') assert s.values is not idx.values @@ -142,10 +140,9 @@ def test_to_series_with_arguments(_index): assert s.name != idx.name -def test_shift(_index): +def test_shift(idx): # GH8083 test the base class for shift - idx = _index pytest.raises(NotImplementedError, idx.shift, 1) pytest.raises(NotImplementedError, idx.shift, 1, 2) @@ -229,15 +226,14 @@ def test_fillna(named_index): assert idx.hasnans -def test_putmask_with_wrong_mask(_index): +def test_putmask_with_wrong_mask(idx): # GH18368 - index = _index with pytest.raises(ValueError): - index.putmask(np.ones(len(index) + 1, np.bool), 1) + idx.putmask(np.ones(len(idx) + 1, np.bool), 1) with pytest.raises(ValueError): - index.putmask(np.ones(len(index) - 1, np.bool), 1) + idx.putmask(np.ones(len(idx) - 1, np.bool), 1) with pytest.raises(ValueError): - index.putmask('foo', 1) + idx.putmask('foo', 1) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 325fc8499b94c..41d50eec8fb10 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -154,12 +154,12 @@ def take_invalid_kwargs(): indices, mode='clip') -def test_isna_behavior(_index): +def test_isna_behavior(idx): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow # isna(MI) with pytest.raises(NotImplementedError): - pd.isna(_index) + pd.isna(idx) def test_large_multiindex_error(): @@ -213,14 +213,13 @@ def test_million_record_attribute_error(): df['a'].foo() -def test_can_hold_identifiers(_index): - idx = _index +def test_can_hold_identifiers(idx): key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is True -def test_metadata_immutable(_index): - levels, labels = _index.levels, _index.labels +def test_metadata_immutable(idx): + levels, labels = idx.levels, idx.labels # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') with tm.assert_raises_regex(TypeError, mutable_regex): @@ -233,7 +232,7 @@ def test_metadata_immutable(_index): with tm.assert_raises_regex(TypeError, mutable_regex): labels[0][0] = labels[0][0] # and for names - names = _index.names + names = idx.names with tm.assert_raises_regex(TypeError, mutable_regex): names[0] = names[0] diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 1bf8bc675f04e..03ca66ffb3e5b 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -12,24 +12,24 @@ [Index(['three', 'one', 'two']), Index(['one']), Index(['one', 'three'])]) -def test_join_level(_index, other, join_type): - join_index, lidx, ridx = other.join(_index, how=join_type, +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join(idx, how=join_type, level='second', return_indexers=True) - exp_level = other.join(_index.levels[1], how=join_type) - assert join_index.levels[0].equals(_index.levels[0]) + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) assert join_index.levels[1].equals(exp_level) # pare down levels mask = np.array( - [x[1] in exp_level for x in _index], dtype=bool) - exp_values = _index.values[mask] + [x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] tm.assert_numpy_array_equal(join_index.values, exp_values) if join_type in ('outer', 'inner'): join_index2, ridx2, lidx2 = \ - _index.join(other, how=join_type, level='second', + idx.join(other, how=join_type, level='second', return_indexers=True) assert join_index.equals(join_index2) @@ -38,20 +38,19 @@ def test_join_level(_index, other, join_type): tm.assert_numpy_array_equal(join_index2.values, exp_values) -def test_join_level_corner_case(_index): +def test_join_level_corner_case(idx): # some corner cases - idx = Index(['three', 'one', 'two']) - result = idx.join(_index, level='second') + index = Index(['three', 'one', 'two']) + result = index.join(idx, level='second') assert isinstance(result, MultiIndex) tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", - _index.join, _index, level=1) + idx.join, idx, level=1) -def test_join_self(_index, join_type): - res = _index - joined = res.join(res, how=join_type) - assert res is joined +def test_join_self(idx, join_type): + joined = idx.join(idx, how=join_type) + assert idx is joined def test_join_multi(): @@ -89,9 +88,8 @@ def test_join_multi(): tm.assert_numpy_array_equal(ridx, exp_ridx) -def test_join_self_unique(_index, join_type): - index = _index - if index.is_unique: - joined = index.join(index, how=join_type) - assert (index == joined).all() +def test_join_self_unique(idx, join_type): + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index a5be2b2705525..a9fbb55679173 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -30,20 +30,20 @@ def test_index_name_retained(): tm.assert_frame_equal(result, df_expected) -def test_changing_names(_index): +def test_changing_names(idx): # names should be applied to levels - level_names = [level.name for level in _index.levels] - check_level_names(_index, _index.names) + level_names = [level.name for level in idx.levels] + check_level_names(idx, idx.names) - view = _index.view() - copy = _index.copy() - shallow_copy = _index._shallow_copy() + view = idx.view() + copy = idx.copy() + shallow_copy = idx._shallow_copy() # changing names should change level names on object - new_names = [name + "a" for name in _index.names] - _index.names = new_names - check_level_names(_index, new_names) + new_names = [name + "a" for name in idx.names] + idx.names = new_names + check_level_names(idx, new_names) # but not on copies check_level_names(view, level_names) @@ -52,12 +52,12 @@ def test_changing_names(_index): # and copies shouldn't change original shallow_copy.names = [name + "c" for name in shallow_copy.names] - check_level_names(_index, new_names) + check_level_names(idx, new_names) -def test_take_preserve_name(_index): - taken = _index.take([3, 0, 1]) - assert taken.names == _index.names +def test_take_preserve_name(idx): + taken = idx.take([3, 0, 1]) + assert taken.names == idx.names def test_copy_names(): @@ -83,15 +83,15 @@ def test_copy_names(): assert multi_idx3.names == ['NewName1', 'NewName2'] -def test_names(_index, index_names): +def test_names(idx, index_names): # names are assigned in setup names = index_names - level_names = [level.name for level in _index.levels] + level_names = [level.name for level in idx.levels] assert names == level_names # setting bad names on existing - index = _index + index = idx tm.assert_raises_regex(ValueError, "^Length of names", setattr, index, "names", list(index.names) + ["third"]) @@ -99,8 +99,8 @@ def test_names(_index, index_names): setattr, index, "names", []) # initializing with bad names (should always be equivalent) - major_axis, minor_axis = _index.levels - major_labels, minor_labels = _index.labels + major_axis, minor_axis = idx.levels + major_labels, minor_labels = idx.labels tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, levels=[major_axis, minor_axis], labels=[major_labels, minor_labels], diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index c6bf17b1071c2..28a8ba3fb0dc3 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -21,26 +21,26 @@ def check_level_names(index, names): assert [level.name for level in index.levels] == list(names) -def test_insert(_index): +def test_insert(idx): # key contained in all levels - new_index = _index.insert(0, ('bar', 'two')) - assert new_index.equal_levels(_index) + new_index = idx.insert(0, ('bar', 'two')) + assert new_index.equal_levels(idx) assert new_index[0] == ('bar', 'two') # key not contained in all levels - new_index = _index.insert(0, ('abc', 'three')) + new_index = idx.insert(0, ('abc', 'three')) - exp0 = Index(list(_index.levels[0]) + ['abc'], name='first') + exp0 = Index(list(idx.levels[0]) + ['abc'], name='first') tm.assert_index_equal(new_index.levels[0], exp0) - exp1 = Index(list(_index.levels[1]) + ['three'], name='second') + exp1 = Index(list(idx.levels[1]) + ['three'], name='second') tm.assert_index_equal(new_index.levels[1], exp1) assert new_index[0] == ('abc', 'three') # key wrong length msg = "Item must have length equal to number of levels" with tm.assert_raises_regex(ValueError, msg): - _index.insert(0, ('foo2',)) + idx.insert(0, ('foo2',)) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], columns=['1st', '2nd', '3rd']) @@ -88,32 +88,32 @@ def test_insert(_index): tm.assert_series_equal(left, right) -def test_bounds(_index): - _index._bounds +def test_bounds(idx): + idx._bounds -def test_append(_index): - result = _index[:3].append(_index[3:]) - assert result.equals(_index) +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) - foos = [_index[:1], _index[1:3], _index[3:]] + foos = [idx[:1], idx[1:3], idx[3:]] result = foos[0].append(foos[1:]) - assert result.equals(_index) + assert result.equals(idx) # empty - result = _index.append([]) - assert result.equals(_index) + result = idx.append([]) + assert result.equals(idx) -def test_groupby(_index): - groups = _index.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = _index.get_values().tolist() +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.get_values().tolist() exp = {1: labels[:3], 2: labels[3:]} tm.assert_dict_equal(groups, exp) # GH5620 - groups = _index.groupby(_index) - exp = {key: [key] for key in _index} + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} tm.assert_dict_equal(groups, exp) @@ -162,34 +162,34 @@ def f(): pytest.raises(NotImplementedError, f) -def test_reorder_levels(_index): +def test_reorder_levels(idx): # this blows up tm.assert_raises_regex(IndexError, '^Too many levels', - _index.reorder_levels, [2, 1, 0]) + idx.reorder_levels, [2, 1, 0]) -def test_astype(_index): - expected = _index.copy() - actual = _index.astype('O') +def test_astype(idx): + expected = idx.copy() + actual = idx.astype('O') assert_copy(actual.levels, expected.levels) assert_copy(actual.labels, expected.labels) check_level_names(actual, expected.names) with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): - _index.astype(np.dtype(int)) + idx.astype(np.dtype(int)) @pytest.mark.parametrize('ordered', [True, False]) -def test_astype_category(_index, ordered): +def test_astype_category(idx, ordered): # GH 18630 msg = '> 1 ndim Categorical are not supported at this time' with tm.assert_raises_regex(NotImplementedError, msg): - _index.astype(CategoricalDtype(ordered=ordered)) + idx.astype(CategoricalDtype(ordered=ordered)) if ordered is False: # dtype='category' defaults to ordered=False, so only test once with tm.assert_raises_regex(NotImplementedError, msg): - _index.astype('category') + idx.astype('category') @pytest.mark.parametrize('first_type,second_type', [ @@ -306,8 +306,8 @@ def test_take(named_index): ind.freq -def test_take_invalid_kwargs(_index): - idx = _index +def test_take_invalid_kwargs(idx): + idx = idx indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" @@ -364,35 +364,35 @@ def test_take_fill_value(): idx.take(np.array([1, -5])) -def test_iter(_index): - result = list(_index) +def test_iter(idx): + result = list(idx) expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] assert result == expected -def test_sub(_index): +def test_sub(idx): - first = _index + first = idx # - now raises (previously was set op difference) with pytest.raises(TypeError): - first - _index[-3:] + first - idx[-3:] with pytest.raises(TypeError): - _index[-3:] - first + idx[-3:] - first with pytest.raises(TypeError): - _index[-3:] - first.tolist() + idx[-3:] - first.tolist() with pytest.raises(TypeError): - first.tolist() - _index[-3:] + first.tolist() - idx[-3:] -def test_nlevels(_index): - assert _index.nlevels == 2 +def test_nlevels(idx): + assert idx.nlevels == 2 -def test_argsort(_index): - result = _index.argsort() - expected = _index.values.argsort() +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected) @@ -456,10 +456,10 @@ def test_unique_datetimelike(): @pytest.mark.parametrize('level', [0, 'first', 1, 'second']) -def test_unique_level(_index, level): +def test_unique_level(idx, level): # GH #17896 - with level= argument - result = _index.unique(level=level) - expected = _index.get_level_values(level).unique() + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() tm.assert_index_equal(result, expected) # With already unique level @@ -540,9 +540,9 @@ def test_duplicate_meta_data(): assert idx.drop_duplicates().names == idx.names -def test_duplicates(_index): - assert not _index.has_duplicates - assert _index.append(_index).has_duplicates +def test_duplicates(idx): + assert not idx.has_duplicates + assert idx.append(idx).has_duplicates index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) @@ -653,9 +653,9 @@ def f(a): tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) -def test_map(_index): +def test_map(idx): # callable - index = _index + index = idx # we don't infer UInt64 if isinstance(index, pd.UInt64Index): @@ -670,28 +670,27 @@ def test_map(_index): @pytest.mark.parametrize( "mapper", [ - lambda values, index: {i: e for e, i in zip(values, index)}, - lambda values, index: pd.Series(values, index)]) -def test_map_dictlike(_index, mapper): + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx)]) +def test_map_dictlike(idx, mapper): - index = _index - if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): - pytest.skip("skipping tests for {}".format(type(index))) + if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(idx))) - identity = mapper(index.values, index) + identity = mapper(idx.values, idx) # we don't infer to UInt64 for a dict - if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): - expected = index.astype('int64') + if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): + expected = idx.astype('int64') else: - expected = index + expected = idx - result = index.map(identity) + result = idx.map(identity) tm.assert_index_equal(result, expected) # empty mappable - expected = pd.Index([np.nan] * len(index)) - result = index.map(mapper(expected, index)) + expected = pd.Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 13e400a7514de..5f336bbe5d4fb 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -11,25 +11,25 @@ def check_level_names(index, names): assert [level.name for level in index.levels] == list(names) -def test_reindex(_index): - result, indexer = _index.reindex(list(_index[:4])) +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) assert isinstance(result, MultiIndex) - check_level_names(result, _index[:4].names) + check_level_names(result, idx[:4].names) - result, indexer = _index.reindex(list(_index)) + result, indexer = idx.reindex(list(idx)) assert isinstance(result, MultiIndex) assert indexer is None - check_level_names(result, _index.names) + check_level_names(result, idx.names) -def test_reindex_level(_index): - idx = Index(['one']) +def test_reindex_level(idx): + index = Index(['one']) - target, indexer = _index.reindex(idx, level='second') - target2, indexer2 = idx.reindex(_index, level='second') + target, indexer = idx.reindex(index, level='second') + target2, indexer2 = index.reindex(idx, level='second') - exp_index = _index.join(idx, level='second', how='right') - exp_index2 = _index.join(idx, level='second', how='left') + exp_index = idx.join(index, level='second', how='right') + exp_index2 = idx.join(index, level='second', how='left') assert target.equals(exp_index) exp_indexer = np.array([0, 2, 4]) @@ -40,17 +40,17 @@ def test_reindex_level(_index): tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) tm.assert_raises_regex(TypeError, "Fill method not supported", - _index.reindex, _index, + idx.reindex, idx, method='pad', level='second') tm.assert_raises_regex(TypeError, "Fill method not supported", - idx.reindex, idx, method='bfill', + index.reindex, index, method='bfill', level='first') -def test_reindex_preserves_names_when_target_is_list_or_ndarray(_index): +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): # GH6552 - idx = _index.copy() + idx = idx.copy() target = idx.copy() idx.names = target.names = [None, None] @@ -88,8 +88,8 @@ def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ -def test_reindex_base(_index): - idx = _index +def test_reindex_base(idx): + idx = idx expected = np.arange(idx.size, dtype=np.intp) actual = idx.get_indexer(idx) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 598407f077adb..c0b2f44fbdafc 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -149,12 +149,10 @@ def test_symmetric_difference(named_index): first.symmetric_difference([1, 2, 3]) -def test_empty(_index): +def test_empty(idx): # GH 15270 - index = _index - assert not index.empty - assert index[:0].empty - + assert not idx.empty + assert idx[:0].empty def test_unique_na(): @@ -164,44 +162,44 @@ def test_unique_na(): tm.assert_index_equal(result, expected) -def test_difference(_index): +def test_difference(idx): - first = _index - result = first.difference(_index[-3:]) - expected = MultiIndex.from_tuples(sorted(_index[:-3].values), + first = idx + result = first.difference(idx[-3:]) + expected = MultiIndex.from_tuples(sorted(idx[:-3].values), sortorder=0, - names=_index.names) + names=idx.names) assert isinstance(result, MultiIndex) assert result.equals(expected) - assert result.names == _index.names + assert result.names == idx.names # empty difference: reflexive - result = _index.difference(_index) - expected = _index[:0] + result = idx.difference(idx) + expected = idx[:0] assert result.equals(expected) - assert result.names == _index.names + assert result.names == idx.names # empty difference: superset - result = _index[-3:].difference(_index) - expected = _index[:0] + result = idx[-3:].difference(idx) + expected = idx[:0] assert result.equals(expected) - assert result.names == _index.names + assert result.names == idx.names # empty difference: degenerate - result = _index[:0].difference(_index) - expected = _index[:0] + result = idx[:0].difference(idx) + expected = idx[:0] assert result.equals(expected) - assert result.names == _index.names + assert result.names == idx.names # names not the same - chunklet = _index[-3:] + chunklet = idx[-3:] chunklet.names = ['foo', 'baz'] result = first.difference(chunklet) assert result.names == (None, None) # empty, but non-equal - result = _index.difference(_index.sortlevel(1)[0]) + result = idx.difference(idx.sortlevel(1)[0]) assert len(result) == 0 # raise Exception called with non-MultiIndex @@ -224,23 +222,23 @@ def test_difference(_index): first.difference, [1, 2, 3, 4, 5]) -def test_union(_index): - piece1 = _index[:5][::-1] - piece2 = _index[3:] +def test_union(idx): + piece1 = idx[:5][::-1] + piece2 = idx[3:] the_union = piece1 | piece2 - tups = sorted(_index.values) + tups = sorted(idx.values) expected = MultiIndex.from_tuples(tups) assert the_union.equals(expected) # corner case, pass self or empty thing: - the_union = _index.union(_index) - assert the_union is _index + the_union = idx.union(idx) + assert the_union is idx - the_union = _index.union(_index[:0]) - assert the_union is _index + the_union = idx.union(idx[:0]) + assert the_union is idx # won't work in python 3 # tuples = _index.values @@ -251,7 +249,7 @@ def test_union(_index): # def test_union_with_regular_index(self): # other = Index(['A', 'B', 'C']) - # result = other.union(_index) + # result = other.union(idx) # assert ('foo', 'one') in result # assert 'B' in result @@ -259,22 +257,22 @@ def test_union(_index): # assert result.equals(result2) -def test_intersection(_index): - piece1 = _index[:5][::-1] - piece2 = _index[3:] +def test_intersection(idx): + piece1 = idx[:5][::-1] + piece2 = idx[3:] the_int = piece1 & piece2 - tups = sorted(_index[3:5].values) + tups = sorted(idx[3:5].values) expected = MultiIndex.from_tuples(tups) assert the_int.equals(expected) # corner case, pass self - the_int = _index.intersection(_index) - assert the_int is _index + the_int = idx.intersection(idx) + assert the_int is idx # empty intersection: disjoint - empty = _index[:2] & _index[2:] - expected = _index[:0] + empty = idx[:2] & idx[2:] + expected = idx[:0] assert empty.equals(expected) # can't do in python 3 diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 8f8b1ece8c679..54f01863f4561 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -13,10 +13,10 @@ from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -def test_sortlevel(_index): +def test_sortlevel(idx): import random - tuples = list(_index) + tuples = list(idx) random.shuffle(tuples) index = MultiIndex.from_tuples(tuples) From 2fb312bf7f021a89fbb30b72b903502bcb9cf3c3 Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Wed, 20 Jun 2018 18:23:56 -0500 Subject: [PATCH 4/7] linting after fixes --- pandas/tests/indexes/multi/test_compat.py | 15 ++-------- .../tests/indexes/multi/test_constructor.py | 9 +++--- pandas/tests/indexes/multi/test_contains.py | 9 +++--- pandas/tests/indexes/multi/test_copy.py | 13 ++------- .../tests/indexes/multi/test_equivalence.py | 16 ++--------- pandas/tests/indexes/multi/test_get_set.py | 13 +++------ pandas/tests/indexes/multi/test_indexing.py | 12 ++------ pandas/tests/indexes/multi/test_integrity.py | 9 +++--- pandas/tests/indexes/multi/test_join.py | 3 +- pandas/tests/indexes/multi/test_monotonic.py | 8 ++---- pandas/tests/indexes/multi/test_operations.py | 28 +++++++++---------- pandas/tests/indexes/multi/test_reindex.py | 1 - pandas/tests/indexes/multi/test_set_ops.py | 10 +++---- pandas/tests/indexes/multi/test_sorting.py | 10 +------ 14 files changed, 52 insertions(+), 104 deletions(-) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index 6a33799b41656..d6f1a4c41bf10 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -2,20 +2,10 @@ import numpy as np -import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) -from pandas._libs.tslib import iNaT -from pandas.compat import PY3 -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.compat import PY3, PYPY, lrange, lzip, range, u, long -import numpy as np - +from pandas import MultiIndex +from pandas.compat import PY3, long def test_numeric_compat(idx): @@ -129,6 +119,7 @@ def test_ndarray_compat_properties(idx, _compat_props): def test_compat(indices): assert indices.tolist() == list(indices) + def test_pickle_compat_construction(_holder): # this is testing for pickle compat if _holder is None: diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index d030df4493f3b..94daaed794254 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -487,18 +487,19 @@ def test_create_index_existing_name(idx): result = pd.Index(index) tm.assert_index_equal( result, Index(Index([('foo', 'one'), ('foo', 'two'), - ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], dtype='object'), - names=['foo', 'bar'])) + names=['foo', 'bar'])) result = pd.Index(index, names=['A', 'B']) tm.assert_index_equal( result, Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], dtype='object'), names=['A', 'B'])) + def test_tuples_with_name_string(): # GH 15110 and GH 14848 diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index de5c27a4bd615..d62c0ff0d876a 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- -import pandas as pd -from pandas import MultiIndex -import pytest -from pandas.compat import PY3, PYPY, lrange, lzip, range, u import numpy as np +import pandas as pd import pandas.util.testing as tm +import pytest +from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index +from pandas.compat import PYPY +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin, iNaT def test_contains_top_level(): diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 63bdac5122ac3..2b4b09a5e4c9a 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -1,18 +1,9 @@ # -*- coding: utf-8 -*- -import numpy as np -import pandas as pd import pandas.util.testing as tm -import pytest -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) -from pandas._libs.tslib import iNaT -from pandas.compat import PY3 -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas import (CategoricalIndex, IntervalIndex, MultiIndex, PeriodIndex, + RangeIndex, Series, compat) def assert_multiindex_copied(copy, original): diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 6a3d9012b0fb6..26a7a32d72f54 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -1,22 +1,12 @@ # -*- coding: utf-8 -*- -import warnings -from itertools import product import numpy as np import pandas as pd import pandas.util.testing as tm -import pytest -from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, - Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - date_range, isna, period_range) -from pandas._libs.tslib import iNaT -from pandas.compat import PY3, PYPY, lrange, lzip, range, u -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.util.testing import assert_copy +from pandas import (Index, MultiIndex, PeriodIndex, RangeIndex, Series, compat, + isna) +from pandas.compat import lrange, lzip, range def test_equals(named_index): diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index df8c92632a5c5..34609793d6c49 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -5,15 +5,10 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import CategoricalIndex, Index, MultiIndex -from pandas.compat import range -from pandas.compat import PY3, PYPY, lrange, lzip, range, u -from pandas.util.testing import assert_almost_equal +from pandas import CategoricalIndex, Index, IntervalIndex, MultiIndex +from pandas.compat import lrange, range from pandas.core.indexes.base import InvalidIndexError -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) +from pandas.util.testing import assert_almost_equal def test_get_loc(idx): @@ -312,7 +307,7 @@ def assert_matching(actual, expected, check_dtype=False): with tm.assert_raises_regex(ValueError, "^On"): idx.set_labels([0, 1, 2, 3, 4, 5], level=0, - inplace=inplace) + inplace=inplace) assert_matching(idx.labels, original_index.labels, check_dtype=True) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 41bb8b4c19aa1..df6ad550c4ad8 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -4,17 +4,12 @@ from datetime import timedelta import numpy as np -import pandas as pd import pandas.util.testing as tm import pytest -from pandas import Index, MultiIndex +from pandas import (Index, Int64Index, MultiIndex, PeriodIndex, RangeIndex, + UInt64Index, compat) from pandas.compat import lrange -from pandas.core.indexes.base import InvalidIndexError -from pandas.util.testing import assert_almost_equal -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin, iNaT def test_slice_locs_partial(idx): @@ -111,7 +106,6 @@ def test_slice_locs_not_contained(): assert result == (0, len(index)) - def test_to_series(idx): # assert that we are creating a copy of the index diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 41d50eec8fb10..90106a29240a6 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -6,11 +6,9 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, - Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - date_range, isna) -from pandas.compat import long, lrange, range +from pandas import (DataFrame, IntervalIndex, MultiIndex, + RangeIndex, compat, date_range) +from pandas.compat import lrange, range from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.errors import PerformanceWarning, UnsortedIndexError @@ -409,6 +407,7 @@ def test_unsortedindex_doc_examples(): assert dfm.index.is_lexsorted() assert dfm.index.lexsort_depth == 2 + def test_hash_error(indices): index = indices tm.assert_raises_regex(TypeError, "unhashable type: %r" % diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 03ca66ffb3e5b..4a386c6e8dbe4 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -30,7 +30,7 @@ def test_join_level(idx, other, join_type): if join_type in ('outer', 'inner'): join_index2, ridx2, lidx2 = \ idx.join(other, how=join_type, level='second', - return_indexers=True) + return_indexers=True) assert join_index.equals(join_index2) tm.assert_numpy_array_equal(lidx, lidx2) @@ -92,4 +92,3 @@ def test_join_self_unique(idx, join_type): if idx.is_unique: joined = idx.join(idx, how=join_type) assert (idx == joined).all() - diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index 7f65e13d4dd14..f02447e27ab81 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -1,13 +1,9 @@ # -*- coding: utf-8 -*- -import pytest - import numpy as np import pandas as pd -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) +import pytest +from pandas import Index, IntervalIndex, MultiIndex def test_is_monotonic_increasing(): diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index 28a8ba3fb0dc3..7ffe5e3fa8b69 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -7,11 +7,10 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (CategoricalIndex, DataFrame, DatetimeIndex, Float64Index, - Index, Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - date_range, isna, period_range) -from pandas.compat import PYPY, lrange, lzip, range, u +from pandas import (DataFrame, DatetimeIndex, Float64Index, Index, Int64Index, + MultiIndex, PeriodIndex, TimedeltaIndex, UInt64Index, + compat, date_range, period_range) +from pandas.compat import lrange, range, u from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.util.testing import assert_copy @@ -480,17 +479,17 @@ def test_multiindex_compare(): # Ensure comparison operations for MultiIndex with nlevels == 1 # behave consistently with those for MultiIndex with nlevels > 1 - midx = pd.MultiIndex.from_product([[0, 1]]) + midx = pd.MultiIndex.from_product([[0, 1]]) - # Equality self-test: MultiIndex object vs self - expected = pd.Series([True, True]) - result = pd.Series(midx == midx) - tm.assert_series_equal(result, expected) + # Equality self-test: MultiIndex object vs self + expected = pd.Series([True, True]) + result = pd.Series(midx == midx) + tm.assert_series_equal(result, expected) - # Greater than comparison: MultiIndex object vs self - expected = pd.Series([False, False]) - result = pd.Series(midx > midx) - tm.assert_series_equal(result, expected) + # Greater than comparison: MultiIndex object vs self + expected = pd.Series([False, False]) + result = pd.Series(midx > midx) + tm.assert_series_equal(result, expected) def test_duplicate_multiindex_labels(): @@ -653,6 +652,7 @@ def f(a): tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) + def test_map(idx): # callable index = idx diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 5f336bbe5d4fb..346b23fed7075 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -97,4 +97,3 @@ def test_reindex_base(idx): with tm.assert_raises_regex(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') - diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index c0b2f44fbdafc..8949ce5ea4fc6 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -1,11 +1,11 @@ -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) +# -*- coding: utf-8 -*- + -import pandas.util.testing as tm import numpy as np import pandas as pd +import pandas.util.testing as tm +from pandas import (CategoricalIndex, DatetimeIndex, MultiIndex, PeriodIndex, + Series, TimedeltaIndex, compat) def test_setops_errorcases(named_index): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 54f01863f4561..2ac3a369ab1d2 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -1,16 +1,8 @@ # -*- coding: utf-8 -*- import numpy as np -import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (CategoricalIndex, DatetimeIndex, Float64Index, Index, - Int64Index, IntervalIndex, MultiIndex, PeriodIndex, - RangeIndex, Series, TimedeltaIndex, UInt64Index, compat, - isna) -from pandas._libs.tslib import iNaT -from pandas.compat import PY3 -from pandas.core.indexes.base import InvalidIndexError -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin +from pandas import CategoricalIndex, MultiIndex, RangeIndex def test_sortlevel(idx): From e238b94e46b846873d6f9c8737504a9ae18a8a66 Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Sat, 23 Jun 2018 09:56:54 -0500 Subject: [PATCH 5/7] moved tests according to feedback test_nlevels -> test_integrity.py unsort tests -> test_sorting.py duplicates and unique test -> test_unqi_dups.py --- .../tests/indexes/multi/test_equivalence.py | 18 ++ pandas/tests/indexes/multi/test_integrity.py | 52 +--- pandas/tests/indexes/multi/test_operations.py | 252 +----------------- pandas/tests/indexes/multi/test_sorting.py | 52 +++- pandas/tests/indexes/multi/test_uniq_dups.py | 236 ++++++++++++++++ 5 files changed, 310 insertions(+), 300 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_uniq_dups.py diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 26a7a32d72f54..27ad284ff78f0 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -232,3 +232,21 @@ def test_nulls(named_index): result = isna(index) tm.assert_numpy_array_equal(index.isna(), result) tm.assert_numpy_array_equal(index.notna(), ~result) + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = pd.MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = pd.Series([True, True]) + result = pd.Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = pd.Series([False, False]) + result = pd.Series(midx > midx) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 90106a29240a6..af93641c0c294 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -10,7 +10,6 @@ RangeIndex, compat, date_range) from pandas.compat import lrange, range from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike -from pandas.errors import PerformanceWarning, UnsortedIndexError def test_labels_dtypes(): @@ -361,53 +360,6 @@ def test_rangeindex_fallback_coercion_bug(): tm.assert_index_equal(result, expected) -def test_unsortedindex(): - # GH 11897 - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, - columns=['one', 'two']) - - # GH 16734: not sorted, but no real slicing - result = df.loc(axis=0)['z', 'a'] - expected = df.iloc[0] - tm.assert_series_equal(result, expected) - - with pytest.raises(UnsortedIndexError): - df.loc(axis=0)['z', slice('a')] - df.sort_index(inplace=True) - assert len(df.loc(axis=0)['z', :]) == 2 - - with pytest.raises(KeyError): - df.loc(axis=0)['q', :] - - -def test_unsortedindex_doc_examples(): - # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa - dfm = DataFrame({'jim': [0, 0, 1, 1], - 'joe': ['x', 'x', 'z', 'y'], - 'jolie': np.random.rand(4)}) - - dfm = dfm.set_index(['jim', 'joe']) - with tm.assert_produces_warning(PerformanceWarning): - dfm.loc[(1, 'z')] - - with pytest.raises(UnsortedIndexError): - dfm.loc[(0, 'y'):(1, 'z')] - - assert not dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 1 - - # sort it - dfm = dfm.sort_index() - dfm.loc[(1, 'z')] - dfm.loc[(0, 'y'):(1, 'z')] - - assert dfm.index.is_lexsorted() - assert dfm.index.lexsort_depth == 2 - - def test_hash_error(indices): index = indices tm.assert_raises_regex(TypeError, "unhashable type: %r" % @@ -446,3 +398,7 @@ def test_memory_usage(named_index): # we report 0 for no-length assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index 7ffe5e3fa8b69..14919c31e3bff 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -import warnings -from itertools import product - import numpy as np import pandas as pd import pandas.util.testing as tm @@ -10,7 +7,7 @@ from pandas import (DataFrame, DatetimeIndex, Float64Index, Index, Int64Index, MultiIndex, PeriodIndex, TimedeltaIndex, UInt64Index, compat, date_range, period_range) -from pandas.compat import lrange, range, u +from pandas.compat import lrange, range from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.util.testing import assert_copy @@ -385,10 +382,6 @@ def test_sub(idx): first.tolist() - idx[-3:] -def test_nlevels(idx): - assert idx.nlevels == 2 - - def test_argsort(idx): result = idx.argsort() expected = idx.values.argsort() @@ -410,249 +403,6 @@ def test_remove_unused_nan(level0, level1): assert('unused' not in result.levels[level]) -@pytest.mark.parametrize('names', [None, ['first', 'second']]) -def test_unique(names): - mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], - names=names) - - res = mi.unique() - exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], - names=mi.names) - tm.assert_index_equal(res, exp) - - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], - names=names) - res = mi.unique() - exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) - tm.assert_index_equal(res, exp) - - # GH #20568 - empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=names) - res = mi.unique() - tm.assert_index_equal(mi, res) - - -def test_unique_datetimelike(): - idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', - '2015-01-01', 'NaT', 'NaT']) - idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', - '2015-01-02', 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() - - eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) - eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', - 'NaT', '2015-01-01'], - tz='Asia/Tokyo') - exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) - tm.assert_index_equal(result, exp) - - -@pytest.mark.parametrize('level', [0, 'first', 1, 'second']) -def test_unique_level(idx, level): - # GH #17896 - with level= argument - result = idx.unique(level=level) - expected = idx.get_level_values(level).unique() - tm.assert_index_equal(result, expected) - - # With already unique level - mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], - names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - tm.assert_index_equal(result, expected) - - # With empty MI - mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) - result = mi.unique(level=level) - expected = mi.get_level_values(level) - - -def test_multiindex_compare(): - # GH 21149 - # Ensure comparison operations for MultiIndex with nlevels == 1 - # behave consistently with those for MultiIndex with nlevels > 1 - - midx = pd.MultiIndex.from_product([[0, 1]]) - - # Equality self-test: MultiIndex object vs self - expected = pd.Series([True, True]) - result = pd.Series(midx == midx) - tm.assert_series_equal(result, expected) - - # Greater than comparison: MultiIndex object vs self - expected = pd.Series([False, False]) - result = pd.Series(midx > midx) - tm.assert_series_equal(result, expected) - - -def test_duplicate_multiindex_labels(): - # GH 17464 - # Make sure that a MultiIndex with duplicate levels throws a ValueError - with pytest.raises(ValueError): - ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) - - # And that using set_levels with duplicate levels fails - ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) - with pytest.raises(ValueError): - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) - - -@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], - ['1', 'a', '1']]) -def test_duplicate_level_names(names): - # GH18872 - pytest.raises(ValueError, pd.MultiIndex.from_product, - [[0, 1]] * 3, names=names) - - # With .rename() - mi = pd.MultiIndex.from_product([[0, 1]] * 3) - tm.assert_raises_regex(ValueError, "Duplicated level name:", - mi.rename, names) - - # With .rename(., level=) - mi.rename(names[0], level=1, inplace=True) - tm.assert_raises_regex(ValueError, "Duplicated level name:", - mi.rename, names[:2], level=[0, 2]) - - -def test_duplicate_meta_data(): - # GH 10115 - index = MultiIndex( - levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) - - for idx in [index, - index.set_names([None, None]), - index.set_names([None, 'Num']), - index.set_names(['Upper', 'Num']), ]: - assert idx.has_duplicates - assert idx.drop_duplicates().names == idx.names - - -def test_duplicates(idx): - assert not idx.has_duplicates - assert idx.append(idx).has_duplicates - - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - assert index.has_duplicates - - # GH 9075 - t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), - (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), - (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), - (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), - (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), - (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), - (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), - (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), - (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), - (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), - (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), - (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), - (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), - (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), - (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), - (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), - (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), - (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - - index = pd.MultiIndex.from_tuples(t) - assert not index.has_duplicates - - # handle int64 overflow if possible - def check(nlevels, with_nulls): - labels = np.tile(np.arange(500), 2) - level = np.arange(500) - - if with_nulls: # inject some null values - labels[500] = -1 # common nan value - labels = [labels.copy() for i in range(nlevels)] - for i in range(nlevels): - labels[i][500 + i - nlevels // 2] = -1 - - labels += [np.array([-1, 1]).repeat(500)] - else: - labels = [labels] * nlevels + [np.arange(2).repeat(500)] - - levels = [level] * nlevels + [[0, 1]] - - # no dups - index = MultiIndex(levels=levels, labels=labels) - assert not index.has_duplicates - - # with a dup - if with_nulls: - def f(a): - return np.insert(a, 1000, a[0]) - labels = list(map(f, labels)) - index = MultiIndex(levels=levels, labels=labels) - else: - values = index.values.tolist() - index = MultiIndex.from_tuples(values + [values[0]]) - - assert index.has_duplicates - - # no overflow - check(4, False) - check(4, True) - - # overflow possible - check(8, False) - check(8, True) - - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) - - for keep in ['first', 'last', False]: - left = mi.duplicated(keep=keep) - right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) - tm.assert_numpy_array_equal(left, right) - - # GH5873 - for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - 2, dtype='bool')) - - for n in range(1, 6): # 1st level shape - for m in range(1, 5): # 2nd level shape - # all possible unique combinations, including nan - lab = product(range(-1, n), range(-1, m)) - mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) - assert len(mi) == (n + 1) * (m + 1) - assert not mi.has_duplicates - - with warnings.catch_warnings(record=True): - # Deprecated - see GH20239 - assert mi.get_duplicates().equals(MultiIndex.from_arrays( - [[], []])) - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - len(mi), dtype='bool')) - - def test_map(idx): # callable index = idx diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 2ac3a369ab1d2..d0782582d70b5 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- import numpy as np +import pandas as pd import pandas.util.testing as tm import pytest -from pandas import CategoricalIndex, MultiIndex, RangeIndex +from pandas import CategoricalIndex, DataFrame, MultiIndex, RangeIndex +from pandas.compat import lrange +from pandas.errors import PerformanceWarning, UnsortedIndexError def test_sortlevel(idx): @@ -86,3 +89,50 @@ def test_numpy_argsort(named_index): msg = "the 'order' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.argsort, ind, order=('a', 'b')) + + +def test_unsortedindex(): + # GH 11897 + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi, + columns=['one', 'two']) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)['z', 'a'] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + with pytest.raises(UnsortedIndexError): + df.loc(axis=0)['z', slice('a')] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)['z', :]) == 2 + + with pytest.raises(KeyError): + df.loc(axis=0)['q', :] + + +def test_unsortedindex_doc_examples(): + # http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + + dfm = dfm.set_index(['jim', 'joe']) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, 'z')] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, 'y'):(1, 'z')] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, 'z')] + dfm.loc[(0, 'y'):(1, 'z')] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 diff --git a/pandas/tests/indexes/multi/test_uniq_dups.py b/pandas/tests/indexes/multi/test_uniq_dups.py new file mode 100644 index 0000000000000..eed8094db7c51 --- /dev/null +++ b/pandas/tests/indexes/multi/test_uniq_dups.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- + +import warnings +from itertools import product + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import MultiIndex +from pandas.compat import range, u + + +@pytest.mark.parametrize('names', [None, ['first', 'second']]) +def test_unique(names): + mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], + names=names) + + res = mi.unique() + exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], + names=mi.names) + tm.assert_index_equal(res, exp) + + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], + names=names) + res = mi.unique() + exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', + '2015-01-01', 'NaT', 'NaT']) + idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02', + '2015-01-02', 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + result = pd.MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT']) + eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02', + 'NaT', '2015-01-01'], + tz='Asia/Tokyo') + exp = pd.MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize('level', [0, 'first', 1, 'second']) +def test_unique_level(idx, level): + # GH #17896 - with level= argument + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], + names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + + +def test_duplicate_multiindex_labels(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) + + +@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], + ['1', 'a', '1']]) +def test_duplicate_level_names(names): + # GH18872 + pytest.raises(ValueError, pd.MultiIndex.from_product, + [[0, 1]] * 3, names=names) + + # With .rename() + mi = pd.MultiIndex.from_product([[0, 1]] * 3) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names) + + # With .rename(., level=) + mi.rename(names[0], level=1, inplace=True) + tm.assert_raises_regex(ValueError, "Duplicated level name:", + mi.rename, names[:2], level=[0, 2]) + + +def test_duplicate_meta_data(): + # GH 10115 + index = MultiIndex( + levels=[[0, 1], [0, 1, 2]], + labels=[[0, 0, 0, 0, 1, 1, 1], + [0, 1, 2, 0, 0, 1, 2]]) + + for idx in [index, + index.set_names([None, None]), + index.set_names([None, 'Num']), + index.set_names(['Upper', 'Num']), ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_duplicates(idx): + assert not idx.has_duplicates + assert idx.append(idx).has_duplicates + + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) + assert index.has_duplicates + + # GH 9075 + t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), + (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), + (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), + (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), + (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), + (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), + (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), + (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), + (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), + (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), + (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), + (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), + (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), + (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), + (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), + (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), + (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), + (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] + + index = pd.MultiIndex.from_tuples(t) + assert not index.has_duplicates + + # handle int64 overflow if possible + def check(nlevels, with_nulls): + labels = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + labels[500] = -1 # common nan value + labels = [labels.copy() for i in range(nlevels)] + for i in range(nlevels): + labels[i][500 + i - nlevels // 2] = -1 + + labels += [np.array([-1, 1]).repeat(500)] + else: + labels = [labels] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + index = MultiIndex(levels=levels, labels=labels) + assert not index.has_duplicates + + # with a dup + if with_nulls: + def f(a): + return np.insert(a, 1000, a[0]) + labels = list(map(f, labels)) + index = MultiIndex(levels=levels, labels=labels) + else: + values = index.values.tolist() + index = MultiIndex.from_tuples(values + [values[0]]) + + assert index.has_duplicates + + # no overflow + check(4, False) + check(4, True) + + # overflow possible + check(8, False) + check(8, True) + + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, labels=labels) + + for keep in ['first', 'last', False]: + left = mi.duplicated(keep=keep) + right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(left, right) + + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + 2, dtype='bool')) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + lab = product(range(-1, n), range(-1, m)) + mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + with warnings.catch_warnings(record=True): + # Deprecated - see GH20239 + assert mi.get_duplicates().equals(MultiIndex.from_arrays( + [[], []])) + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + len(mi), dtype='bool')) From 7a3753f2a4e1c20c2ed059ac8c6a5fcb0697d106 Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Sat, 23 Jun 2018 11:35:55 -0500 Subject: [PATCH 6/7] removing the named_index fixture --- pandas/tests/indexes/multi/conftest.py | 5 - pandas/tests/indexes/multi/test_contains.py | 69 +++-- pandas/tests/indexes/multi/test_copy.py | 11 +- .../tests/indexes/multi/test_equivalence.py | 11 +- pandas/tests/indexes/multi/test_get_set.py | 29 +- pandas/tests/indexes/multi/test_indexing.py | 54 ++-- pandas/tests/indexes/multi/test_integrity.py | 33 ++- pandas/tests/indexes/multi/test_operations.py | 103 ++++---- pandas/tests/indexes/multi/test_set_ops.py | 249 +++++++++--------- pandas/tests/indexes/multi/test_sorting.py | 49 ++-- 10 files changed, 292 insertions(+), 321 deletions(-) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 4e72cd046beb6..47dc62a047be6 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -22,11 +22,6 @@ def idx(): return index -@pytest.fixture -def named_index(idx): - return {'index': idx} - - @pytest.fixture def index_names(): return ['first', 'second'] diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index d62c0ff0d876a..2c407483211e6 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -4,9 +4,8 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index +from pandas import MultiIndex from pandas.compat import PYPY -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin, iNaT def test_contains_top_level(): @@ -94,37 +93,37 @@ def test_isin_level_kwarg(): pytest.raises(KeyError, idx.isin, vals_1, level='C') -def test_hasnans_isnans(named_index): +def test_hasnans_isnans(idx): # GH 11343, added tests for hasnans / isnans - for name, index in named_index.items(): - if isinstance(index, MultiIndex): - pass - else: - idx = index.copy() - - # cases in indices doesn't include NaN - expected = np.array([False] * len(idx), dtype=bool) - tm.assert_numpy_array_equal(idx._isnan, expected) - assert not idx.hasnans - - idx = index.copy() - values = idx.values - - if len(index) == 0: - continue - elif isinstance(index, DatetimeIndexOpsMixin): - values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - tm.assert_numpy_array_equal(idx._isnan, expected) - assert idx.hasnans + # TODO: remove or change test not valid for MultiIndex + if isinstance(idx, MultiIndex): + pass + # else: + # _index = idx.copy() + + # # cases in indices doesn't include NaN + # expected = np.array([False] * len(_index), dtype=bool) + # tm.assert_numpy_array_equal(_index._isnan, expected) + # assert not _index.hasnans + + # _index = idx.copy() + # values = _index.values + + # if len(idx) == 0: + # continue + # elif isinstance(idx, DatetimeIndexOpsMixin): + # values[1] = iNaT + # elif isinstance(idx, (Int64Index, UInt64Index)): + # continue + # else: + # values[1] = np.nan + + # if isinstance(idx, PeriodIndex): + # _index = idx.__class__(values, freq=idx.freq) + # else: + # _index = idx.__class__(values) + + # expected = np.array([False] * len(_index), dtype=bool) + # expected[1] = True + # tm.assert_numpy_array_equal(_index._isnan, expected) + # assert _index.hasnans diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 2b4b09a5e4c9a..ef40375074de5 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -40,11 +40,12 @@ def test_view(idx): assert_multiindex_copied(i_view, idx) -def test_copy_name(named_index): +def test_copy_name(idx): # gh-12309: Check that the "name" argument # passed at initialization is honored. - for name, index in compat.iteritems(named_index): + # TODO: Remove or refactor MultiIndex not tested. + for name, index in compat.iteritems({'idx': idx}): if isinstance(index, MultiIndex): continue @@ -69,10 +70,12 @@ def test_copy_name(named_index): assert s3.index.name == 'mario' -def test_ensure_copied_data(named_index): +def test_ensure_copied_data(idx): # Check the "copy" argument of each Index.__new__ is honoured # GH12309 - for name, index in compat.iteritems(named_index): + # TODO: REMOVE THIS TEST. MultiIndex is tested seperately as noted below. + + for name, index in compat.iteritems({'idx': idx}): init_kwargs = {} if isinstance(index, PeriodIndex): # Needs "freq" specification: diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 27ad284ff78f0..c6b4452cf4a2f 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -9,9 +9,9 @@ from pandas.compat import lrange, lzip, range -def test_equals(named_index): - - for name, idx in compat.iteritems(named_index): +def test_equals(idx): + # TODO: Remove or Refactor. MultiIndex not tested. + for name, idx in compat.iteritems({'idx': idx}): assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) @@ -208,11 +208,12 @@ def test_is_numeric(idx): assert not idx.is_numeric() -def test_nulls(named_index): +def test_nulls(idx): # this is really a smoke test for the methods # as these are adequately tested for function elsewhere - for name, index in named_index.items(): + # TODO: Remove or Refactor. MultiIndex not Implemeted. + for name, index in [('idx', idx), ]: if len(index) == 0: tm.assert_numpy_array_equal( index.isna(), np.array([], dtype=bool)) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 34609793d6c49..4324f0dadc05d 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -630,21 +630,20 @@ def test_get_unique_index(idx): tm.assert_index_equal(result, expected) -def test_get_indexer_consistency(named_index): +def test_get_indexer_consistency(idx): # See GH 16819 - for name, index in named_index.items(): - if isinstance(index, IntervalIndex): - continue - - if index.is_unique or isinstance(index, CategoricalIndex): - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - else: - e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = index.get_indexer(index[0:2]) - - indexer, _ = index.get_indexer_non_unique(index[0:2]) + if isinstance(idx, IntervalIndex): + pass + + if idx.is_unique or isinstance(idx, CategoricalIndex): + indexer = idx.get_indexer(idx[0:2]) assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = idx.get_indexer(idx[0:2]) + + indexer, _ = idx.get_indexer_non_unique(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index df6ad550c4ad8..dc00ed4abde10 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -6,8 +6,7 @@ import numpy as np import pandas.util.testing as tm import pytest -from pandas import (Index, Int64Index, MultiIndex, PeriodIndex, RangeIndex, - UInt64Index, compat) +from pandas import Index, Int64Index, MultiIndex, PeriodIndex, UInt64Index from pandas.compat import lrange from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin, iNaT @@ -141,47 +140,36 @@ def test_shift(idx): pytest.raises(NotImplementedError, idx.shift, 1, 2) -def test_insert_base(named_index): +def test_insert_base(idx): - for name, idx in compat.iteritems(named_index): - result = idx[1:4] + result = idx[1:4] - if not len(idx): - continue + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) - # test 0th element - assert idx[0:4].equals(result.insert(0, idx[0])) +def test_delete_base(idx): -def test_delete_base(named_index): + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name - for name, idx in compat.iteritems(named_index): + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name - if not len(idx): - continue + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) - if isinstance(idx, RangeIndex): - # tested in class - continue - expected = idx[1:] - result = idx.delete(0) - assert result.equals(expected) - assert result.name == expected.name - - expected = idx[:-1] - result = idx.delete(-1) - assert result.equals(expected) - assert result.name == expected.name - - with pytest.raises((IndexError, ValueError)): - # either depending on numpy version - result = idx.delete(len(idx)) - - -def test_fillna(named_index): +def test_fillna(idx): # GH 11343 - for name, index in named_index.items(): + + # TODO: Remove or Refactor. Not Implemented for MultiIndex + for name, index in [('idx', idx), ]: if len(index) == 0: pass elif isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index af93641c0c294..859c85a7bcc38 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -7,7 +7,7 @@ import pandas.util.testing as tm import pytest from pandas import (DataFrame, IntervalIndex, MultiIndex, - RangeIndex, compat, date_range) + RangeIndex, date_range) from pandas.compat import lrange, range from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -378,26 +378,25 @@ def testit(ind): tm.assert_raises_regex(ValueError, "^Length", testit, indices) -def test_memory_usage(named_index): - for name, index in compat.iteritems(named_index): - result = index.memory_usage() - if len(index): - index.get_loc(index[0]) - result2 = index.memory_usage() - result3 = index.memory_usage(deep=True) +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) - # RangeIndex, IntervalIndex - # don't have engines - if not isinstance(index, (RangeIndex, IntervalIndex)): - assert result2 > result + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result - if index.inferred_type == 'object': - assert result3 > result2 + if idx.inferred_type == 'object': + assert result3 > result2 - else: + else: - # we report 0 for no-length - assert result == 0 + # we report 0 for no-length + assert result == 0 def test_nlevels(idx): diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index 14919c31e3bff..a787e770f65e7 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -6,7 +6,7 @@ import pytest from pandas import (DataFrame, DatetimeIndex, Float64Index, Index, Int64Index, MultiIndex, PeriodIndex, TimedeltaIndex, UInt64Index, - compat, date_range, period_range) + date_range, period_range) from pandas.compat import lrange, range from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -283,23 +283,17 @@ def test_append_mixed_dtypes(): tm.assert_index_equal(res, exp) -def test_take(named_index): +def test_take(idx): indexer = [4, 3, 0, 2] - for k, ind in named_index.items(): + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) - # separate - if k in ['boolIndex', 'tuples', 'empty']: - continue - - result = ind.take(indexer) - expected = ind[indexer] - assert result.equals(expected) - - if not isinstance(ind, - (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # GH 10791 - with pytest.raises(AttributeError): - ind.freq + if not isinstance(idx, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + idx.freq def test_take_invalid_kwargs(idx): @@ -444,55 +438,54 @@ def test_map_dictlike(idx, mapper): tm.assert_index_equal(result, expected) -def test_numpy_ufuncs(named_index): +def test_numpy_ufuncs(idx): # test ufuncs of numpy 1.9.2. see: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html # some functions are skipped because it may return different result # for unicode input depending on numpy version - for name, idx in compat.iteritems(named_index): - for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, - np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, - np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, - np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, - np.rad2deg]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - # PeriodIndex behavior should be changed in future version + for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + # PeriodIndex behavior should be changed in future version + with pytest.raises(Exception): + with np.errstate(all='ignore'): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # coerces to float (e.g. np.sin) + with np.errstate(all='ignore'): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + + tm.assert_index_equal(result, exp) + assert isinstance(result, pd.Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + continue + else: with pytest.raises(Exception): with np.errstate(all='ignore'): func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # coerces to float (e.g. np.sin) - with np.errstate(all='ignore'): - result = func(idx) - exp = Index(func(idx.values), name=idx.name) - tm.assert_index_equal(result, exp) - assert isinstance(result, pd.Float64Index) + for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(idx) == 0: + continue else: - # raise AttributeError or TypeError - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - with np.errstate(all='ignore'): - func(idx) - - for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) with pytest.raises(Exception): func(idx) - elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): - # Results in bool array - result = func(idx) - assert isinstance(result, np.ndarray) - assert not isinstance(result, Index) - else: - if len(idx) == 0: - continue - else: - with pytest.raises(Exception): - func(idx) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 8949ce5ea4fc6..8a4c2e9f3fcbb 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -5,148 +5,143 @@ import pandas as pd import pandas.util.testing as tm from pandas import (CategoricalIndex, DatetimeIndex, MultiIndex, PeriodIndex, - Series, TimedeltaIndex, compat) + Series, TimedeltaIndex) -def test_setops_errorcases(named_index): - for name, idx in compat.iteritems(named_index): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] +def test_setops_errorcases(idx): + # # non-iterable input + cases = [0.5, 'xxx'] + methods = [idx.intersection, idx.union, idx.difference, + idx.symmetric_difference] - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) - - -def test_intersection_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[:5] - second = idx[:3] - intersect = first.intersection(second) - - if isinstance(idx, CategoricalIndex): - pass - else: - assert tm.equalContents(intersect, second) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] + for method in methods: for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) - elif isinstance(idx, CategoricalIndex): - pass - else: + tm.assert_raises_regex(TypeError, + "Input must be Index " + "or array-like", + method, case) + + +def test_intersection_base(idx): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): result = first.intersection(case) - assert tm.equalContents(result, second) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) - - -def test_union_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[3:] - second = idx[:5] - everything = idx - union = first.union(second) - assert tm.equalContents(union, everything) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) - elif isinstance(idx, CategoricalIndex): - pass - else: + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.intersection([1, 2, 3]) + + +def test_union_base(idx): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): result = first.union(case) - assert tm.equalContents(result, everything) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) - - -def test_difference_base(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[2:] - second = idx[:4] - answer = idx[4:] - result = first.difference(second) - - if isinstance(idx, CategoricalIndex): + elif isinstance(idx, CategoricalIndex): pass else: - assert tm.equalContents(result, answer) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) - elif isinstance(idx, CategoricalIndex): - pass - elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.sort_values().asi8, - answer.sort_values().asi8) - else: + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.union([1, 2, 3]) + + +def test_difference_base(idx): + first = idx[2:] + second = idx[:4] + answer = idx[4:] + result = first.difference(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): result = first.difference(case) - assert tm.equalContents(result, answer) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) - - -def test_symmetric_difference(named_index): - for name, idx in compat.iteritems(named_index): - first = idx[1:] - second = idx[:-1] - if isinstance(idx, CategoricalIndex): + elif isinstance(idx, CategoricalIndex): pass + elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + assert result.__class__ == answer.__class__ + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) else: - answer = idx[[0, -1]] - result = first.symmetric_difference(second) + result = first.difference(case) assert tm.equalContents(result, answer) - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) - elif isinstance(idx, CategoricalIndex): - pass - else: + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + result = first.difference([1, 2, 3]) + + +def test_symmetric_difference(idx): + first = idx[1:] + second = idx[:-1] + if isinstance(idx, CategoricalIndex): + pass + else: + answer = idx[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assert_raises_regex(ValueError, msg): result = first.symmetric_difference(case) - assert tm.equalContents(result, answer) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - first.symmetric_difference([1, 2, 3]) + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assert_raises_regex(TypeError, msg): + first.symmetric_difference([1, 2, 3]) def test_empty(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index d0782582d70b5..739c212d4c187 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -64,31 +64,30 @@ def test_sort(indices): pytest.raises(TypeError, indices.sort) -def test_numpy_argsort(named_index): - for k, ind in named_index.items(): - result = np.argsort(ind) - expected = ind.argsort() - tm.assert_numpy_array_equal(result, expected) - - # these are the only two types that perform - # pandas compatibility input validation - the - # rest already perform separate (or no) such - # validation via their 'values' attribute as - # defined in pandas.core.indexes/base.py - they - # cannot be changed at the moment due to - # backwards compatibility concerns - if isinstance(type(ind), (CategoricalIndex, RangeIndex)): - msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, ind, axis=1) - - msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, kind='mergesort') - - msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, order=('a', 'b')) +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, + np.argsort, idx, axis=1) + + msg = "the 'kind' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + idx, kind='mergesort') + + msg = "the 'order' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.argsort, + idx, order=('a', 'b')) def test_unsortedindex(): From baaace366e15a770f69fff9a0ba82015309285c9 Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Tue, 26 Jun 2018 12:04:46 -0500 Subject: [PATCH 7/7] split test_multi.py into separate files and address the comments from pull request --- pandas/tests/indexes/multi/conftest.py | 10 +- pandas/tests/indexes/multi/test_analytics.py | 8 + pandas/tests/indexes/multi/test_compat.py | 25 +- .../tests/indexes/multi/test_constructor.py | 78 +--- pandas/tests/indexes/multi/test_contains.py | 36 -- pandas/tests/indexes/multi/test_conversion.py | 40 +- pandas/tests/indexes/multi/test_copy.py | 2 +- pandas/tests/indexes/multi/test_drop.py | 22 - .../tests/indexes/multi/test_equivalence.py | 38 +- pandas/tests/indexes/multi/test_get_set.py | 382 ++++-------------- pandas/tests/indexes/multi/test_indexing.py | 310 ++++++++++---- pandas/tests/indexes/multi/test_integrity.py | 117 +----- pandas/tests/indexes/multi/test_missing.py | 145 +++++++ pandas/tests/indexes/multi/test_operations.py | 49 +-- .../indexes/multi/test_partial_indexing.py | 98 +++++ pandas/tests/indexes/multi/test_set_ops.py | 7 - pandas/tests/indexes/multi/test_sorting.py | 121 +++++- ..._dups.py => test_unique_and_duplicates.py} | 43 +- 18 files changed, 771 insertions(+), 760 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_analytics.py create mode 100644 pandas/tests/indexes/multi/test_missing.py create mode 100644 pandas/tests/indexes/multi/test_partial_indexing.py rename pandas/tests/indexes/multi/{test_uniq_dups.py => test_unique_and_duplicates.py} (88%) diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index 47dc62a047be6..6cf9003500b61 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -7,6 +7,8 @@ @pytest.fixture def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object major_axis = Index(['foo', 'bar', 'baz', 'qux']) minor_axis = Index(['one', 'two']) @@ -24,14 +26,18 @@ def idx(): @pytest.fixture def index_names(): + # names that match those in the idx fixture for testing equality of + # names assigned to the idx return ['first', 'second'] @pytest.fixture -def _holder(): +def holder(): + # the MultiIndex constructor used to base compatibility with pickle return MultiIndex @pytest.fixture -def _compat_props(): +def compat_props(): + # a MultiIndex must have these properties associated with it return ['shape', 'ndim', 'size'] diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 0000000000000..072356e4923a6 --- /dev/null +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,8 @@ +import pytest + + +def test_shift(idx): + + # GH8083 test the base class for shift + pytest.raises(NotImplementedError, idx.shift, 1) + pytest.raises(NotImplementedError, idx.shift, 1, 2) diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index d6f1a4c41bf10..0dfe322c2eef9 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -34,12 +34,8 @@ def test_logical_compat(idx): def test_boolean_context_compat(idx): - # boolean context compat - def f(): - if idx: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) + with pytest.raises(ValueError): + bool(idx) def test_boolean_context_compat2(): @@ -50,11 +46,8 @@ def test_boolean_context_compat2(): i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) common = i1.intersection(i2) - def f(): - if common: - pass - - tm.assert_raises_regex(ValueError, 'The truth value of a', f) + with pytest.raises(ValueError): + bool(common) def test_inplace_mutation_resets_values(): @@ -103,12 +96,12 @@ def test_inplace_mutation_resets_values(): tm.assert_almost_equal(mi2.values, new_values) -def test_ndarray_compat_properties(idx, _compat_props): +def test_ndarray_compat_properties(idx, compat_props): assert idx.T.equals(idx) assert idx.transpose().equals(idx) values = idx.values - for prop in _compat_props: + for prop in compat_props: assert getattr(idx, prop) == getattr(values, prop) # test for validity @@ -120,10 +113,10 @@ def test_compat(indices): assert indices.tolist() == list(indices) -def test_pickle_compat_construction(_holder): +def test_pickle_compat_construction(holder): # this is testing for pickle compat - if _holder is None: + if holder is None: return # need an object to create with - pytest.raises(TypeError, _holder) + pytest.raises(TypeError, holder) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 94daaed794254..9577662bda366 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import DataFrame, Index, MultiIndex, date_range +from pandas import Index, MultiIndex, date_range from pandas._libs.tslib import Timestamp from pandas.compat import lrange, range from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -91,82 +91,6 @@ def test_copy_in_constructor(): assert mi.levels[0][0] == val -def test_reconstruct_sort(): - - # starts off lexsorted & monotonic - mi = MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert mi.is_lexsorted() - assert mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert recons.is_lexsorted() - assert recons.is_monotonic - assert mi is recons - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), - ('x', 'b'), ('y', 'a'), ('z', 'b')], - names=['one', 'two']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - # cannot convert to lexsorted - mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], - labels=[[0, 1, 0, 2], [2, 0, 0, 1]], - names=['col1', 'col2']) - assert not mi.is_lexsorted() - assert not mi.is_monotonic - - recons = mi._sort_levels_monotonic() - assert not recons.is_lexsorted() - assert not recons.is_monotonic - - assert mi.equals(recons) - assert Index(mi.values).equals(Index(recons.values)) - - -def test_reconstruct_remove_unused(): - # xref to GH 2770 - df = DataFrame([['deleteMe', 1, 9], - ['keepMe', 2, 9], - ['keepMeToo', 3, 9]], - columns=['first', 'second', 'third']) - df2 = df.set_index(['first', 'second'], drop=False) - df2 = df2[df2['first'] != 'deleteMe'] - - # removed levels are there - expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], - [1, 2, 3]], - labels=[[1, 2], [1, 2]], - names=['first', 'second']) - result = df2.index - tm.assert_index_equal(result, expected) - - expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], - [2, 3]], - labels=[[0, 1], [0, 1]], - names=['first', 'second']) - result = df2.index.remove_unused_levels() - tm.assert_index_equal(result, expected) - - # idempotent - result2 = result.remove_unused_levels() - tm.assert_index_equal(result2, expected) - assert result2.is_(result) - - def test_from_arrays(idx): arrays = [] for lev, lab in zip(idx.levels, idx.labels): diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py index 2c407483211e6..aaed4467816da 100644 --- a/pandas/tests/indexes/multi/test_contains.py +++ b/pandas/tests/indexes/multi/test_contains.py @@ -91,39 +91,3 @@ def test_isin_level_kwarg(): tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) pytest.raises(KeyError, idx.isin, vals_1, level='C') - - -def test_hasnans_isnans(idx): - # GH 11343, added tests for hasnans / isnans - # TODO: remove or change test not valid for MultiIndex - if isinstance(idx, MultiIndex): - pass - # else: - # _index = idx.copy() - - # # cases in indices doesn't include NaN - # expected = np.array([False] * len(_index), dtype=bool) - # tm.assert_numpy_array_equal(_index._isnan, expected) - # assert not _index.hasnans - - # _index = idx.copy() - # values = _index.values - - # if len(idx) == 0: - # continue - # elif isinstance(idx, DatetimeIndexOpsMixin): - # values[1] = iNaT - # elif isinstance(idx, (Int64Index, UInt64Index)): - # continue - # else: - # values[1] = np.nan - - # if isinstance(idx, PeriodIndex): - # _index = idx.__class__(values, freq=idx.freq) - # else: - # _index = idx.__class__(values) - - # expected = np.array([False] * len(_index), dtype=bool) - # expected[1] = True - # tm.assert_numpy_array_equal(_index._isnan, expected) - # assert _index.hasnans diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 2ec3eff92be13..ff99941ba9948 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -87,12 +87,10 @@ def test_to_hierarchical(): assert result.names == index.names -def test_legacy_pickle(): - if PY3: - pytest.skip("testing for legacy pickles not " - "support on py3") +@pytest.mark.skipif(PY3, reason="testing legacy pickles not support on py3") +def test_legacy_pickle(datapath): - path = tm.get_data_path('multiindex_v1.pickle') + path = datapath('indexes', 'multi', 'data', 'multiindex_v1.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) @@ -109,10 +107,10 @@ def test_legacy_pickle(): assert_almost_equal(exp, exp2) -def test_legacy_v2_unpickle(): +def test_legacy_v2_unpickle(datapath): # 0.7.3 -> 0.8.0 format manage - path = tm.get_data_path('mindex_073.pickle') + path = datapath('indexes', 'multi', 'data', 'mindex_073.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) @@ -148,3 +146,31 @@ def test_pickle(indices): unpickled = tm.round_trip_pickle(indices) assert indices.equals(unpickled) indices.name = original_name + + +def test_to_series(idx): + # assert that we are creating a copy of the index + + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + +def test_to_series_with_arguments(idx): + # GH18699 + + # index kwarg + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = idx + s = idx.to_series(name='__test') + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index ef40375074de5..282f2fa84efe0 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from copy import copy, deepcopy import pandas.util.testing as tm from pandas import (CategoricalIndex, IntervalIndex, MultiIndex, PeriodIndex, @@ -111,7 +112,6 @@ def test_ensure_copied_data(idx): def test_copy_and_deepcopy(indices): - from copy import copy, deepcopy if isinstance(indices, MultiIndex): return diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index eac2feea3103f..281db7fd2c8a7 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -124,25 +124,3 @@ def test_drop_not_lexsorted(): with tm.assert_produces_warning(PerformanceWarning): tm.assert_index_equal(lexsorted_mi.drop('a'), not_lexsorted_mi.drop('a')) - - -def test_dropna(): - # GH 6194 - idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], - [1, 2, np.nan, np.nan, 5], - ['a', 'b', 'c', np.nan, 'e']]) - - exp = pd.MultiIndex.from_arrays([[1, 5], - [1, 5], - ['a', 'e']]) - tm.assert_index_equal(idx.dropna(), exp) - tm.assert_index_equal(idx.dropna(how='any'), exp) - - exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], - [1, 2, np.nan, 5], - ['a', 'b', 'c', 'e']]) - tm.assert_index_equal(idx.dropna(how='all'), exp) - - msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): - idx.dropna(how='xxx') diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index c6b4452cf4a2f..0bebe3165e2e8 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -4,8 +4,7 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -from pandas import (Index, MultiIndex, PeriodIndex, RangeIndex, Series, compat, - isna) +from pandas import Index, MultiIndex, RangeIndex, Series, compat from pandas.compat import lrange, lzip, range @@ -33,8 +32,6 @@ def test_equals(idx): def test_equals_op(idx): # GH9947, GH10637 index_a = idx - if isinstance(index_a, PeriodIndex): - return n = len(index_a) index_b = index_a[0:-1] @@ -208,37 +205,10 @@ def test_is_numeric(idx): assert not idx.is_numeric() -def test_nulls(idx): - # this is really a smoke test for the methods - # as these are adequately tested for function elsewhere - - # TODO: Remove or Refactor. MultiIndex not Implemeted. - for name, index in [('idx', idx), ]: - if len(index) == 0: - tm.assert_numpy_array_equal( - index.isna(), np.array([], dtype=bool)) - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): - idx.isna() - else: - - if not index.hasnans: - tm.assert_numpy_array_equal( - index.isna(), np.zeros(len(index), dtype=bool)) - tm.assert_numpy_array_equal( - index.notna(), np.ones(len(index), dtype=bool)) - else: - result = isna(index) - tm.assert_numpy_array_equal(index.isna(), result) - tm.assert_numpy_array_equal(index.notna(), ~result) - - def test_multiindex_compare(): - # GH 21149 - # Ensure comparison operations for MultiIndex with nlevels == 1 - # behave consistently with those for MultiIndex with nlevels > 1 + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 midx = pd.MultiIndex.from_product([[0, 1]]) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 4324f0dadc05d..56fd4c04cb96e 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -5,182 +5,105 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import CategoricalIndex, Index, IntervalIndex, MultiIndex -from pandas.compat import lrange, range -from pandas.core.indexes.base import InvalidIndexError -from pandas.util.testing import assert_almost_equal - - -def test_get_loc(idx): - assert idx.get_loc(('foo', 'two')) == 1 - assert idx.get_loc(('baz', 'two')) == 3 - pytest.raises(KeyError, idx.get_loc, ('bar', 'two')) - pytest.raises(KeyError, idx.get_loc, 'quux') - - pytest.raises(NotImplementedError, idx.get_loc, 'foo', - method='nearest') - - # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - pytest.raises(KeyError, index.get_loc, (1, 1)) - assert index.get_loc((2, 0)) == slice(3, 5) - - -def test_get_loc_duplicates(): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - assert result == expected - # pytest.raises(Exception, index.get_loc, 2) - - index = Index(['c', 'a', 'a', 'b', 'b']) - rs = index.get_loc('c') - xp = 0 - assert rs == xp - - -def test_get_loc_level(): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - assert loc == expected - assert new_index.equals(exp_index) - - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - assert loc == expected - assert new_index is None - - pytest.raises(KeyError, index.get_loc_level, (2, 2)) - - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( - [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - assert result == expected - assert new_index.equals(index.droplevel(0)) - - -@pytest.mark.parametrize('level', [0, 1]) -@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None]) -def test_get_loc_nan(level, null_val): - # GH 18485 : NaN in MultiIndex - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - levels[level] = np.array([0, null_val], dtype=type(null_val)) - key[level] = null_val - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - -def test_get_loc_missing_nan(): - # GH 8569 - idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) - assert isinstance(idx.get_loc(1), slice) - pytest.raises(KeyError, idx.get_loc, 3) - pytest.raises(KeyError, idx.get_loc, np.nan) - pytest.raises(KeyError, idx.get_loc, [np.nan]) +from pandas import CategoricalIndex, Index, MultiIndex +from pandas.compat import range -@pytest.mark.parametrize('dtype1', [int, float, bool, str]) -@pytest.mark.parametrize('dtype2', [int, float, bool, str]) -def test_get_loc_multiple_dtypes(dtype1, dtype2): - # GH 18520 - levels = [np.array([0, 1]).astype(dtype1), - np.array([0, 1]).astype(dtype2)] - idx = pd.MultiIndex.from_product(levels) - assert idx.get_loc(idx[2]) == 2 - - -@pytest.mark.parametrize('level', [0, 1]) -@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) -def test_get_loc_implicit_cast(level, dtypes): - # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa - levels = [['a', 'b'], ['c', 'd']] - key = ['b', 'd'] - lev_dtype, key_dtype = dtypes - levels[level] = np.array([0, 1], dtype=lev_dtype) - key[level] = key_dtype(1) - idx = MultiIndex.from_product(levels) - assert idx.get_loc(tuple(key)) == 3 - - -def test_get_loc_cast_bool(): - # GH 19086 : int is casted to bool, but not vice-versa - levels = [[False, True], np.arange(2, dtype='int64')] - idx = MultiIndex.from_product(levels) +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + pytest.raises(IndexError, idx._get_level_number, 2) + tm.assert_raises_regex(KeyError, 'Level fourth not found', + idx._get_level_number, 'fourth') - assert idx.get_loc((0, 1)) == 1 - assert idx.get_loc((1, 0)) == 2 - pytest.raises(KeyError, idx.get_loc, (False, True)) - pytest.raises(KeyError, idx.get_loc, (True, False)) +def test_get_level_values(idx): + result = idx.get_level_values(0) + expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], + name='first') + tm.assert_index_equal(result, expected) + assert result.name == 'first' + result = idx.get_level_values('first') + expected = idx.get_level_values(0) + tm.assert_index_equal(result, expected) -def test_get_indexer(): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(['A', 'B']), + CategoricalIndex([1, 2, 3])], + labels=[np.array([0, 0, 0, 1, 1, 1]), + np.array([0, 1, 2, 0, 1, 2])]) - major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) +def test_get_value_duplicates(): + index = MultiIndex(levels=[['D', 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) - r1 = idx2.get_indexer(idx1, method='pad') - e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) - assert_almost_equal(r1, e1) + assert index.get_loc('D') == slice(0, 3) + with pytest.raises(KeyError): + index._engine.get_value(np.array([]), 'D') - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) +def test_get_level_values_all_na(): + # GH 17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) - assert_almost_equal(r1, e1) + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) +def test_get_level_values_int_with_na(): + # GH 17924 + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2.values) - rexp1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, rexp1) + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) - r1 = idx1.get_indexer([1, 2, 3]) - assert (r1 == [-1, -1, -1]).all() - # create index with duplicates - idx1 = Index(lrange(10) + lrange(10)) - idx2 = Index(lrange(20)) +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) - msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): - idx1.get_indexer(idx2) + result = index.get_level_values(1) + expected = pd.Index(['a', np.nan, 1]) + tm.assert_index_equal(result, expected) + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) -def test_get_indexer_nearest(): - midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='nearest') - with pytest.raises(NotImplementedError): - midx.get_indexer(['a'], method='pad', tolerance=2) + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) def test_set_name_methods(idx, index_names): @@ -498,152 +421,3 @@ def test_set_value_keeps_names(): df.at[('grethe', '4'), 'one'] = 99.34 assert df._is_copy is None assert df.index.names == ('Name', 'Number') - - -def test_get_level_number_integer(idx): - idx.names = [1, 0] - assert idx._get_level_number(1) == 0 - assert idx._get_level_number(0) == 1 - pytest.raises(IndexError, idx._get_level_number, 2) - tm.assert_raises_regex(KeyError, 'Level fourth not found', - idx._get_level_number, 'fourth') - - -def test_get_level_values(idx): - result = idx.get_level_values(0) - expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'], - name='first') - tm.assert_index_equal(result, expected) - assert result.name == 'first' - - result = idx.get_level_values('first') - expected = idx.get_level_values(0) - tm.assert_index_equal(result, expected) - - # GH 10460 - index = MultiIndex( - levels=[CategoricalIndex(['A', 'B']), - CategoricalIndex([1, 2, 3])], - labels=[np.array([0, 0, 0, 1, 1, 1]), - np.array([0, 1, 2, 0, 1, 2])]) - - exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) - tm.assert_index_equal(index.get_level_values(0), exp) - exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) - tm.assert_index_equal(index.get_level_values(1), exp) - - -def test_getitem(idx): - # scalar - assert idx[2] == ('bar', 'one') - - # slice - result = idx[2:5] - expected = idx[[2, 3, 4]] - assert result.equals(expected) - - # boolean - result = idx[[True, False, True, False, True, True]] - result2 = idx[np.array([True, False, True, False, True, True])] - expected = idx[[0, 2, 4, 5]] - assert result.equals(expected) - assert result2.equals(expected) - - -def test_getitem_group_select(idx): - sorted_idx, _ = idx.sortlevel(0) - assert sorted_idx.get_loc('baz') == slice(3, 4) - assert sorted_idx.get_loc('foo') == slice(0, 2) - - -def test_get_value_duplicates(): - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - - assert index.get_loc('D') == slice(0, 3) - with pytest.raises(KeyError): - index._engine.get_value(np.array([]), 'D') - - -def test_get_level_values_all_na(): - # GH 17924 when level entirely consists of nan - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1], dtype=object) - tm.assert_index_equal(result, expected) - - -def test_get_level_values_int_with_na(): - # GH 17924 - arrays = [['a', 'b', 'b'], [1, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([1, np.nan, 2]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = Index([np.nan, np.nan, 2]) - tm.assert_index_equal(result, expected) - - -def test_get_level_values_na(): - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([np.nan, np.nan, np.nan]) - tm.assert_index_equal(result, expected) - - result = index.get_level_values(1) - expected = pd.Index(['a', np.nan, 1]) - tm.assert_index_equal(result, expected) - - arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(1) - expected = pd.DatetimeIndex([0, 1, pd.NaT]) - tm.assert_index_equal(result, expected) - - arrays = [[], []] - index = pd.MultiIndex.from_arrays(arrays) - result = index.get_level_values(0) - expected = pd.Index([], dtype=object) - tm.assert_index_equal(result, expected) - - -def test_get_unique_index(idx): - idx = idx[[0, 1, 0, 1, 1, 0, 0]] - expected = idx._shallow_copy(idx[[0, 1]]) - - for dropna in [False, True]: - result = idx._get_unique_index(dropna=dropna) - assert result.unique - tm.assert_index_equal(result, expected) - - -def test_get_indexer_consistency(idx): - # See GH 16819 - if isinstance(idx, IntervalIndex): - pass - - if idx.is_unique or isinstance(idx, CategoricalIndex): - indexer = idx.get_indexer(idx[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp - else: - e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = idx.get_indexer(idx[0:2]) - - indexer, _ = idx.get_indexer_non_unique(idx[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index dc00ed4abde10..0b528541e5eb6 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -4,11 +4,15 @@ from datetime import timedelta import numpy as np -import pandas.util.testing as tm import pytest -from pandas import Index, Int64Index, MultiIndex, PeriodIndex, UInt64Index + +import pandas as pd +import pandas.util.testing as tm +from pandas import (Categorical, CategoricalIndex, Index, IntervalIndex, + MultiIndex, date_range) from pandas.compat import lrange -from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin, iNaT +from pandas.core.indexes.base import InvalidIndexError +from pandas.util.testing import assert_almost_equal def test_slice_locs_partial(idx): @@ -105,41 +109,6 @@ def test_slice_locs_not_contained(): assert result == (0, len(index)) -def test_to_series(idx): - # assert that we are creating a copy of the index - - s = idx.to_series() - assert s.values is not idx.values - assert s.index is not idx - assert s.name == idx.name - - -def test_to_series_with_arguments(idx): - # GH18699 - - # index kwarg - s = idx.to_series(index=idx) - - assert s.values is not idx.values - assert s.index is idx - assert s.name == idx.name - - # name kwarg - idx = idx - s = idx.to_series(name='__test') - - assert s.values is not idx.values - assert s.index is not idx - assert s.name != idx.name - - -def test_shift(idx): - - # GH8083 test the base class for shift - pytest.raises(NotImplementedError, idx.shift, 1) - pytest.raises(NotImplementedError, idx.shift, 1, 2) - - def test_insert_base(idx): result = idx[1:4] @@ -165,49 +134,6 @@ def test_delete_base(idx): result = idx.delete(len(idx)) -def test_fillna(idx): - # GH 11343 - - # TODO: Remove or Refactor. Not Implemented for MultiIndex - for name, index in [('idx', idx), ]: - if len(index) == 0: - pass - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): - idx.fillna(idx[0]) - else: - idx = index.copy() - result = idx.fillna(idx[0]) - tm.assert_index_equal(result, idx) - assert result is not idx - - msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): - idx.fillna([idx[0]]) - - idx = index.copy() - values = idx.values - - if isinstance(index, DatetimeIndexOpsMixin): - values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - tm.assert_numpy_array_equal(idx._isnan, expected) - assert idx.hasnans - - def test_putmask_with_wrong_mask(idx): # GH18368 @@ -219,3 +145,225 @@ def test_putmask_with_wrong_mask(idx): with pytest.raises(ValueError): idx.putmask('foo', 1) + + +def test_get_indexer(): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + + msg = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, msg): + idx1.get_indexer(idx2) + + +def test_get_indexer_nearest(): + midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='nearest') + with pytest.raises(NotImplementedError): + midx.get_indexer(['a'], method='pad', tolerance=2) + + +def test_getitem(idx): + # scalar + assert idx[2] == ('bar', 'one') + + # slice + result = idx[2:5] + expected = idx[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) + assert sorted_idx.get_loc('baz') == slice(3, 4) + assert sorted_idx.get_loc('foo') == slice(0, 2) + + +def test_get_indexer_consistency(idx): + # See GH 16819 + if isinstance(idx, IntervalIndex): + pass + + if idx.is_unique or isinstance(idx, CategoricalIndex): + indexer = idx.get_indexer(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = idx.get_indexer(idx[0:2]) + + indexer, _ = idx.get_indexer_non_unique(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + +def test_get_loc(idx): + assert idx.get_loc(('foo', 'two')) == 1 + assert idx.get_loc(('baz', 'two')) == 3 + pytest.raises(KeyError, idx.get_loc, ('bar', 'two')) + pytest.raises(KeyError, idx.get_loc, 'quux') + + pytest.raises(NotImplementedError, idx.get_loc, 'foo', + method='nearest') + + # 3 levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + pytest.raises(KeyError, index.get_loc, (1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + +def test_get_loc_duplicates(): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert rs == xp + + +def test_get_loc_level(): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + pytest.raises(KeyError, index.get_loc_level, (2, 2)) + + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + +@pytest.mark.parametrize('dtype1', [int, float, bool, str]) +@pytest.mark.parametrize('dtype2', [int, float, bool, str]) +def test_get_loc_multiple_dtypes(dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), + np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + +@pytest.mark.parametrize('level', [0, 1]) +@pytest.mark.parametrize('dtypes', [[int, float], [float, int]]) +def test_get_loc_implicit_cast(level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_cast_bool(): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype='int64')] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + pytest.raises(KeyError, idx.get_loc, (False, True)) + pytest.raises(KeyError, idx.get_loc, (True, False)) + + +@pytest.mark.parametrize('level', [0, 1]) +def test_get_loc_nan(level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [['a', 'b'], ['c', 'd']] + key = ['b', 'd'] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_missing_nan(): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + pytest.raises(KeyError, idx.get_loc, 3) + pytest.raises(KeyError, idx.get_loc, np.nan) + pytest.raises(KeyError, idx.get_loc, [np.nan]) + + +def test_get_indexer_categorical_time(): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [Categorical(['a', 'b', 'c']), + Categorical(date_range("2012-01-01", periods=3, freq='H'))]) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 859c85a7bcc38..7a8f8b60d31ba 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -6,8 +6,7 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (DataFrame, IntervalIndex, MultiIndex, - RangeIndex, date_range) +from pandas import IntervalIndex, MultiIndex, RangeIndex from pandas.compat import lrange, range from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -177,28 +176,6 @@ def test_large_multiindex_error(): df_above_1000000.loc[(3, 0), 'dest'] -def test_nan_stays_float(): - - # GH 7031 - idx0 = pd.MultiIndex(levels=[["A", "B"], []], - labels=[[1, 0], [-1, -1]], - names=[0, 1]) - idx1 = pd.MultiIndex(levels=[["C"], ["D"]], - labels=[[0], [0]], - names=[0, 1]) - idxm = idx0.join(idx1, how='outer') - assert pd.isna(idx0.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(idxm.get_level_values(1)[:-1]).all() - - df0 = pd.DataFrame([[1, 2]], index=idx0) - df1 = pd.DataFrame([[3, 4]], index=idx1) - dfm = df0 - df1 - assert pd.isna(df0.index.get_level_values(1)).all() - # the following failed in 0.14.1 - assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() - - def test_million_record_attribute_error(): # GH 18165 r = list(range(1000000)) @@ -244,98 +221,6 @@ def test_level_setting_resets_attributes(): assert not ind.is_monotonic -def test_partial_string_timestamp_multiindex(): - # GH10331 - dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') - abc = ['a', 'b', 'c'] - ix = pd.MultiIndex.from_product([dr, abc]) - df = pd.DataFrame({'c1': range(0, 15)}, index=ix) - idx = pd.IndexSlice - - # c1 - # 2016-01-01 00:00:00 a 0 - # b 1 - # c 2 - # 2016-01-01 12:00:00 a 3 - # b 4 - # c 5 - # 2016-01-02 00:00:00 a 6 - # b 7 - # c 8 - # 2016-01-02 12:00:00 a 9 - # b 10 - # c 11 - # 2016-01-03 00:00:00 a 12 - # b 13 - # c 14 - - # partial string matching on a single index - for df_swap in (df.swaplevel(), - df.swaplevel(0), - df.swaplevel(0, 1)): - df_swap = df_swap.sort_index() - just_a = df_swap.loc['a'] - result = just_a.loc['2016-01-01'] - expected = df.loc[idx[:, 'a'], :].iloc[0:2] - expected.index = expected.index.droplevel(1) - tm.assert_frame_equal(result, expected) - - # indexing with IndexSlice - result = df.loc[idx['2016-01-01':'2016-02-01', :], :] - expected = df - tm.assert_frame_equal(result, expected) - - # match on secondary index - result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] - expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] - tm.assert_frame_equal(result, expected) - - # Even though this syntax works on a single index, this is somewhat - # ambiguous and we don't want to extend this behavior forward to work - # in multi-indexes. This would amount to selecting a scalar from a - # column. - with pytest.raises(KeyError): - df['2016-01-01'] - - # partial string match on year only - result = df.loc['2016'] - expected = df - tm.assert_frame_equal(result, expected) - - # partial string match on date - result = df.loc['2016-01-01'] - expected = df.iloc[0:6] - tm.assert_frame_equal(result, expected) - - # partial string match on date and hour, from middle - result = df.loc['2016-01-02 12'] - expected = df.iloc[9:12] - tm.assert_frame_equal(result, expected) - - # partial string match on secondary index - result = df_swap.loc[idx[:, '2016-01-02'], :] - expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] - tm.assert_frame_equal(result, expected) - - # tuple selector with partial string match on date - result = df.loc[('2016-01-01', 'a'), :] - expected = df.iloc[[0, 3]] - tm.assert_frame_equal(result, expected) - - # Slicing date on first level should break (of course) - with pytest.raises(KeyError): - df_swap.loc['2016-01-01'] - - # GH12685 (partial string with daily resolution or below) - dr = date_range('2013-01-01', periods=100, freq='D') - ix = MultiIndex.from_product([dr, ['a', 'b']]) - df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) - - result = df.loc[idx['2013-03':'2013-03', :], :] - expected = df.iloc[118:180] - tm.assert_frame_equal(result, expected) - - def test_rangeindex_fallback_coercion_bug(): # GH 12893 foo = pd.DataFrame(np.arange(100).reshape((10, 10))) diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py new file mode 100644 index 0000000000000..01465ea4c2f3b --- /dev/null +++ b/pandas/tests/indexes/multi/test_missing.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest +from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index, isna +from pandas._libs.tslib import iNaT +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +def test_fillna(idx): + # GH 11343 + + # TODO: Remove or Refactor. Not Implemented for MultiIndex + for name, index in [('idx', idx), ]: + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with tm.assert_raises_regex(TypeError, msg): + idx.fillna([idx[0]]) + + idx = index.copy() + values = idx.values + + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans + + +def test_dropna(): + # GH 6194 + idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ['a', 'b', 'c', np.nan, 'e']]) + + exp = pd.MultiIndex.from_arrays([[1, 5], + [1, 5], + ['a', 'e']]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how='any'), exp) + + exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], + [1, 2, np.nan, 5], + ['a', 'b', 'c', 'e']]) + tm.assert_index_equal(idx.dropna(how='all'), exp) + + msg = "invalid how option: xxx" + with tm.assert_raises_regex(ValueError, msg): + idx.dropna(how='xxx') + + +def test_nulls(idx): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + # TODO: Remove or Refactor. MultiIndex not Implemeted. + for name, index in [('idx', idx), ]: + if len(index) == 0: + tm.assert_numpy_array_equal( + index.isna(), np.array([], dtype=bool)) + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with tm.assert_raises_regex(NotImplementedError, msg): + idx.isna() + else: + + if not index.hasnans: + tm.assert_numpy_array_equal( + index.isna(), np.zeros(len(index), dtype=bool)) + tm.assert_numpy_array_equal( + index.notna(), np.ones(len(index), dtype=bool)) + else: + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) + + +@pytest.mark.xfail +def test_hasnans_isnans(idx): + # GH 11343, added tests for hasnans / isnans + index = idx.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(index), dtype=bool) + tm.assert_numpy_array_equal(index._isnan, expected) + assert not index.hasnans + + index = idx.copy() + values = index.values + values[1] = np.nan + + index = idx.__class__(values) + + expected = np.array([False] * len(index), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = pd.MultiIndex(levels=[["A", "B"], []], + labels=[[1, 0], [-1, -1]], + names=[0, 1]) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], + labels=[[0], [0]], + names=[0, 1]) + idxm = idx0.join(idx1, how='outer') + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() diff --git a/pandas/tests/indexes/multi/test_operations.py b/pandas/tests/indexes/multi/test_operations.py index a787e770f65e7..d38cb28039595 100644 --- a/pandas/tests/indexes/multi/test_operations.py +++ b/pandas/tests/indexes/multi/test_operations.py @@ -4,9 +4,9 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import (DataFrame, DatetimeIndex, Float64Index, Index, Int64Index, - MultiIndex, PeriodIndex, TimedeltaIndex, UInt64Index, - date_range, period_range) +from pandas import (DatetimeIndex, Float64Index, Index, Int64Index, MultiIndex, + PeriodIndex, TimedeltaIndex, UInt64Index, date_range, + period_range) from pandas.compat import lrange, range from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -188,34 +188,6 @@ def test_astype_category(idx, ordered): idx.astype('category') -@pytest.mark.parametrize('first_type,second_type', [ - ('int64', 'int64'), - ('datetime64[D]', 'str')]) -def test_remove_unused_levels_large(first_type, second_type): - # GH16556 - - # because tests should be deterministic (and this test in particular - # checks that levels are removed, which is not the case for every - # random input): - rng = np.random.RandomState(4) # seed is arbitrary value that works - - size = 1 << 16 - df = DataFrame(dict( - first=rng.randint(0, 1 << 13, size).astype(first_type), - second=rng.randint(0, 1 << 10, size).astype(second_type), - third=rng.rand(size))) - df = df.groupby(['first', 'second']).sum() - df = df[df.third < 0.1] - - result = df.index.remove_unused_levels() - assert len(result.levels[0]) < len(df.index.levels[0]) - assert len(result.levels[1]) < len(df.index.levels[1]) - assert result.equals(df.index) - - expected = df.reset_index().set_index(['first', 'second']).index - tm.assert_index_equal(result, expected) - - def test_repeat(): reps = 2 numbers = [1, 2, 3] @@ -382,21 +354,6 @@ def test_argsort(idx): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize('level0', [['a', 'd', 'b'], - ['a', 'd', 'b', 'unused']]) -@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], - ['w', 'x', 'y', 'z', 'unused']]) -def test_remove_unused_nan(level0, level1): - # GH 18417 - mi = pd.MultiIndex(levels=[level0, level1], - labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) - - result = mi.remove_unused_levels() - tm.assert_index_equal(result, mi) - for level in 0, 1: - assert('unused' not in result.levels[level]) - - def test_map(idx): # callable index = idx diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py new file mode 100644 index 0000000000000..40e5e26e9cb0f --- /dev/null +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -0,0 +1,98 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas import DataFrame, MultiIndex, date_range + + +def test_partial_string_timestamp_multiindex(): + # GH10331 + dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H') + abc = ['a', 'b', 'c'] + ix = pd.MultiIndex.from_product([dr, abc]) + df = pd.DataFrame({'c1': range(0, 15)}, index=ix) + idx = pd.IndexSlice + + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + + # partial string matching on a single index + for df_swap in (df.swaplevel(), + df.swaplevel(0), + df.swaplevel(0, 1)): + df_swap = df_swap.sort_index() + just_a = df_swap.loc['a'] + result = just_a.loc['2016-01-01'] + expected = df.loc[idx[:, 'a'], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + # indexing with IndexSlice + result = df.loc[idx['2016-01-01':'2016-02-01', :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError): + df['2016-01-01'] + + # partial string match on year only + result = df.loc['2016'] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc['2016-01-01'] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc['2016-01-02 12'] + expected = df.iloc[9:12] + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[idx[:, '2016-01-02'], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + result = df.loc[('2016-01-01', 'a'), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) + with pytest.raises(KeyError): + df_swap.loc['2016-01-01'] + + # GH12685 (partial string with daily resolution or below) + dr = date_range('2013-01-01', periods=100, freq='D') + ix = MultiIndex.from_product([dr, ['a', 'b']]) + df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix) + + result = df.loc[idx['2013-03':'2013-03', :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 8a4c2e9f3fcbb..79a3837aac7f8 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -150,13 +150,6 @@ def test_empty(idx): assert idx[:0].empty -def test_unique_na(): - idx = pd.Index([2, np.nan, 2, 1], name='my_index') - expected = pd.Index([2, np.nan, 1], name='my_index') - result = idx.unique() - tm.assert_index_equal(result, expected) - - def test_difference(idx): first = idx diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 739c212d4c187..d6165c17c6717 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm import pytest -from pandas import CategoricalIndex, DataFrame, MultiIndex, RangeIndex +from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex from pandas.compat import lrange from pandas.errors import PerformanceWarning, UnsortedIndexError @@ -135,3 +135,122 @@ def test_unsortedindex_doc_examples(): assert dfm.index.is_lexsorted() assert dfm.index.lexsort_depth == 2 + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'), + ('x', 'b'), ('y', 'a'), ('z', 'b')], + names=['one', 'two']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], + labels=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=['col1', 'col2']) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame([['deleteMe', 1, 9], + ['keepMe', 2, 9], + ['keepMeToo', 3, 9]], + columns=['first', 'second', 'third']) + df2 = df.set_index(['first', 'second'], drop=False) + df2 = df2[df2['first'] != 'deleteMe'] + + # removed levels are there + expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'], + [1, 2, 3]], + labels=[[1, 2], [1, 2]], + names=['first', 'second']) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex(levels=[['keepMe', 'keepMeToo'], + [2, 3]], + labels=[[0, 1], [0, 1]], + names=['first', 'second']) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize('first_type,second_type', [ + ('int64', 'int64'), + ('datetime64[D]', 'str')]) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame(dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size))) + df = df.groupby(['first', 'second']).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(['first', 'second']).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize('level0', [['a', 'd', 'b'], + ['a', 'd', 'b', 'unused']]) +@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'], + ['w', 'x', 'y', 'z', 'unused']]) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = pd.MultiIndex(levels=[level0, level1], + labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]]) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert('unused' not in result.levels[level]) diff --git a/pandas/tests/indexes/multi/test_uniq_dups.py b/pandas/tests/indexes/multi/test_unique_and_duplicates.py similarity index 88% rename from pandas/tests/indexes/multi/test_uniq_dups.py rename to pandas/tests/indexes/multi/test_unique_and_duplicates.py index eed8094db7c51..a97d84ace9602 100644 --- a/pandas/tests/indexes/multi/test_uniq_dups.py +++ b/pandas/tests/indexes/multi/test_unique_and_duplicates.py @@ -89,22 +89,22 @@ def test_duplicate_multiindex_labels(): inplace=True) -@pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'], - ['1', 'a', '1']]) +@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2], + [1, 'a', 1]]) def test_duplicate_level_names(names): - # GH18872 - pytest.raises(ValueError, pd.MultiIndex.from_product, - [[0, 1]] * 3, names=names) + # GH18872, GH19029 + mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names) + assert mi.names == names # With .rename() mi = pd.MultiIndex.from_product([[0, 1]] * 3) - tm.assert_raises_regex(ValueError, "Duplicated level name:", - mi.rename, names) + mi = mi.rename(names) + assert mi.names == names # With .rename(., level=) - mi.rename(names[0], level=1, inplace=True) - tm.assert_raises_regex(ValueError, "Duplicated level name:", - mi.rename, names[:2], level=[0, 2]) + mi.rename(names[1], level=1, inplace=True) + mi = mi.rename([names[0], names[2]], level=[0, 2]) + assert mi.names == names def test_duplicate_meta_data(): @@ -234,3 +234,26 @@ def f(a): tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( len(mi), dtype='bool')) + + +def test_get_unique_index(idx): + idx = idx[[0, 1, 0, 1, 1, 0, 0]] + expected = idx._shallow_copy(idx[[0, 1]]) + + for dropna in [False, True]: + result = idx._get_unique_index(dropna=dropna) + assert result.unique + tm.assert_index_equal(result, expected) + + +def test_unique_na(): + idx = pd.Index([2, np.nan, 2, 1], name='my_index') + expected = pd.Index([2, np.nan, 1], name='my_index') + result = idx.unique() + tm.assert_index_equal(result, expected) + + +def test_duplicate_level_names_access_raises(idx): + idx.names = ['foo', 'foo'] + tm.assert_raises_regex(KeyError, 'Level foo not found', + idx._get_level_number, 'foo')