From 9da121e857994fcde74bd63ce6030519aa6818c6 Mon Sep 17 00:00:00 2001 From: mtrbean Date: Tue, 29 Jul 2014 13:42:45 -0700 Subject: [PATCH] ENH: Add optional level argument to set_names(), set_levels() and set_labels() (GH7792) --- doc/source/indexing.rst | 11 ++ doc/source/v0.15.0.txt | 10 ++ pandas/core/index.py | 210 ++++++++++++++++++++++++++++++------- pandas/tests/test_index.py | 140 +++++++++++++++++++++++-- 4 files changed, 326 insertions(+), 45 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 837e3b386f3d0..023c200e271ab 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -2162,6 +2162,17 @@ you can specify ``inplace=True`` to have the data change in place. ind.name = "bob" ind +.. versionadded:: 0.15.0 + +``set_names``, ``set_levels``, and ``set_labels`` also take an optional +`level`` argument + +.. ipython:: python + + index + index.levels[1] + index.set_levels(["a", "b"], level=1) + Adding an index to an existing DataFrame ---------------------------------------- diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 9279d8b0288c4..2e5ec8e2f4193 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -35,6 +35,16 @@ API changes levels aren't all level names or all level numbers. See :ref:`Reshaping by stacking and unstacking `. +- :func:`set_names`, :func:`set_labels`, and :func:`set_levels` methods now take an optional ``level`` keyword argument to all modification of specific level(s) of a MultiIndex. Additionally :func:`set_names` now accepts a scalar string value when operating on an ``Index`` or on a specific level of a ``MultiIndex`` (:issue:`7792`) + + .. ipython:: python + + idx = pandas.MultiIndex.from_product([['a'], range(3), list("pqr")], names=['foo', 'bar', 'baz']) + idx.set_names('qux', level=0) + idx.set_names(['qux','baz'], level=[0,1]) + idx.set_levels(['a','b','c'], level='bar') + idx.set_levels([['a','b','c'],[1,2,3]], level=[1,2]) + - Raise a ``ValueError`` in ``df.to_hdf`` with 'fixed' format, if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`) - :func:`rolling_min`, :func:`rolling_max`, :func:`rolling_cov`, and :func:`rolling_corr` diff --git a/pandas/core/index.py b/pandas/core/index.py index 81602d5240a08..8c43511866e9a 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -362,7 +362,7 @@ def nlevels(self): def _get_names(self): return FrozenList((self.name,)) - def _set_names(self, values): + def _set_names(self, values, level=None): if len(values) != 1: raise ValueError('Length of new names must be 1, got %d' % len(values)) @@ -370,28 +370,61 @@ def _set_names(self, values): names = property(fset=_set_names, fget=_get_names) - def set_names(self, names, inplace=False): + def set_names(self, names, level=None, inplace=False): """ Set new names on index. Defaults to returning new index. Parameters ---------- - names : sequence - names to set + names : str or sequence + name(s) to set + level : int or level name, or sequence of int / level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None for all levels) + Otherwise level must be None inplace : bool if True, mutates in place Returns ------- new index (of same type and class...etc) [if inplace, returns None] + + Examples + -------- + >>> Index([1, 2, 3, 4]).set_names('foo') + Int64Index([1, 2, 3, 4], dtype='int64') + >>> Index([1, 2, 3, 4]).set_names(['foo']) + Int64Index([1, 2, 3, 4], dtype='int64') + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_names(['baz', 'quz']) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'baz', u'quz']) + >>> idx.set_names('baz', level=0) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'baz', u'bar']) """ - if not com.is_list_like(names): + if level is not None and self.nlevels == 1: + raise ValueError('Level must be None for non-MultiIndex') + + if level is not None and not com.is_list_like(level) and com.is_list_like(names): + raise TypeError("Names must be a string") + + if not com.is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") + + if not com.is_list_like(names): + names = [names] + if level is not None and not com.is_list_like(level): + level = [level] + if inplace: idx = self else: idx = self._shallow_copy() - idx._set_names(names) + idx._set_names(names, level=level) if not inplace: return idx @@ -2218,19 +2251,30 @@ def _verify_integrity(self): def _get_levels(self): return self._levels - def _set_levels(self, levels, copy=False, validate=True, + def _set_levels(self, levels, level=None, copy=False, validate=True, verify_integrity=False): # This is NOT part of the levels property because it should be # externally not allowed to set levels. User beware if you change # _levels directly if validate and len(levels) == 0: raise ValueError('Must set non-zero number of levels.') - if validate and len(levels) != len(self._labels): - raise ValueError('Length of levels must match length of labels.') - levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy() - for lev in levels) + if validate and level is None and len(levels) != self.nlevels: + raise ValueError('Length of levels must match number of levels.') + if validate and level is not None and len(levels) != len(level): + raise ValueError('Length of levels must match length of level.') + + if level is None: + new_levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy() + for lev in levels) + else: + level = [self._get_level_number(l) for l in level] + new_levels = list(self._levels) + for l, v in zip(level, levels): + new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() + new_levels = FrozenList(new_levels) + names = self.names - self._levels = levels + self._levels = new_levels if any(names): self._set_names(names) @@ -2240,15 +2284,17 @@ def _set_levels(self, levels, copy=False, validate=True, if verify_integrity: self._verify_integrity() - def set_levels(self, levels, inplace=False, verify_integrity=True): + def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): """ Set new levels on MultiIndex. Defaults to returning new index. Parameters ---------- - levels : sequence - new levels to apply + levels : sequence or list of sequence + new level(s) to apply + level : int or level name, or sequence of int / level names (default None) + level(s) to set (None for all levels) inplace : bool if True, mutates in place verify_integrity : bool (default True) @@ -2257,15 +2303,47 @@ def set_levels(self, levels, inplace=False, verify_integrity=True): Returns ------- new index (of same type and class...etc) - """ - if not com.is_list_like(levels) or not com.is_list_like(levels[0]): - raise TypeError("Levels must be list of lists-like") + + + Examples + -------- + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_levels([['a','b'], [1,2]]) + MultiIndex(levels=[[u'a', u'b'], [1, 2]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels(['a','b'], level=0) + MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels(['a','b'], level='bar') + MultiIndex(levels=[[1, 2], [u'a', u'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) + MultiIndex(levels=[[u'a', u'b'], [1, 2]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + """ + if level is not None and not com.is_list_like(level): + if not com.is_list_like(levels): + raise TypeError("Levels must be list-like") + if com.is_list_like(levels[0]): + raise TypeError("Levels must be list-like") + level = [level] + levels = [levels] + elif level is None or com.is_list_like(level): + if not com.is_list_like(levels) or not com.is_list_like(levels[0]): + raise TypeError("Levels must be list of lists-like") + if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() - idx._set_levels(levels, validate=True, + idx._set_levels(levels, level=level, validate=True, verify_integrity=verify_integrity) if not inplace: return idx @@ -2280,27 +2358,42 @@ def set_levels(self, levels, inplace=False, verify_integrity=True): def _get_labels(self): return self._labels - def _set_labels(self, labels, copy=False, validate=True, + def _set_labels(self, labels, level=None, copy=False, validate=True, verify_integrity=False): - if validate and len(labels) != self.nlevels: - raise ValueError("Length of labels must match length of levels") - self._labels = FrozenList( - _ensure_frozen(labs, copy=copy)._shallow_copy() for labs in labels) + + if validate and level is None and len(labels) != self.nlevels: + raise ValueError("Length of labels must match number of levels") + if validate and level is not None and len(labels) != len(level): + raise ValueError('Length of labels must match length of levels.') + + if level is None: + new_labels = FrozenList(_ensure_frozen(v, copy=copy)._shallow_copy() + for v in labels) + else: + level = [self._get_level_number(l) for l in level] + new_labels = list(self._labels) + for l, v in zip(level, labels): + new_labels[l] = _ensure_frozen(v, copy=copy)._shallow_copy() + new_labels = FrozenList(new_labels) + + self._labels = new_labels self._tuples = None self._reset_cache() if verify_integrity: self._verify_integrity() - def set_labels(self, labels, inplace=False, verify_integrity=True): + def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): """ Set new labels on MultiIndex. Defaults to returning new index. Parameters ---------- - labels : sequence of arrays + labels : sequence or list of sequence new labels to apply + level : int or level name, or sequence of int / level names (default None) + level(s) to set (None for all levels) inplace : bool if True, mutates in place verify_integrity : bool (default True) @@ -2309,15 +2402,46 @@ def set_labels(self, labels, inplace=False, verify_integrity=True): Returns ------- new index (of same type and class...etc) - """ - if not com.is_list_like(labels) or not com.is_list_like(labels[0]): - raise TypeError("Labels must be list of lists-like") + + Examples + -------- + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_labels([[1,0,1,0], [0,0,1,1]]) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([1,0,1,0], level=0) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([0,0,1,1], level='bar') + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([[1,0,1,0], [0,0,1,1]], level=[0,1]) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + """ + if level is not None and not com.is_list_like(level): + if not com.is_list_like(labels): + raise TypeError("Labels must be list-like") + if com.is_list_like(labels[0]): + raise TypeError("Labels must be list-like") + level = [level] + labels = [labels] + elif level is None or com.is_list_like(level): + if not com.is_list_like(labels) or not com.is_list_like(labels[0]): + raise TypeError("Labels must be list of lists-like") + if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() - idx._set_labels(labels, verify_integrity=verify_integrity) + idx._set_labels(labels, level=level, verify_integrity=verify_integrity) if not inplace: return idx @@ -2434,18 +2558,30 @@ def __len__(self): def _get_names(self): return FrozenList(level.name for level in self.levels) - def _set_names(self, values, validate=True): + def _set_names(self, names, level=None, validate=True): """ sets names on levels. WARNING: mutates! Note that you generally want to set this *after* changing levels, so - that it only acts on copies""" - values = list(values) - if validate and len(values) != self.nlevels: - raise ValueError('Length of names must match length of levels') + that it only acts on copies + """ + + names = list(names) + + if validate and level is not None and len(names) != len(level): + raise ValueError('Length of names must match length of level.') + if validate and level is None and len(names) != self.nlevels: + raise ValueError( + 'Length of names must match number of levels in MultiIndex.') + + if level is None: + level = range(self.nlevels) + else: + level = [self._get_level_number(l) for l in level] + # set the name - for name, level in zip(values, self.levels): - level.rename(name, inplace=True) + for l, name in zip(level, names): + self.levels[l].rename(name, inplace=True) names = property( fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex") diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index a8486beb57042..8b1f6ce3e7f45 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -70,9 +70,11 @@ def test_set_name_methods(self): self.assertIsNone(res) self.assertEqual(ind.name, new_name) self.assertEqual(ind.names, [new_name]) - with assertRaisesRegexp(TypeError, "list-like"): - # should still fail even if it would be the right length - ind.set_names("a") + #with assertRaisesRegexp(TypeError, "list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with assertRaisesRegexp(ValueError, "Level must be None"): + ind.set_names("a", level=0) # rename in place just leaves tuples and other containers alone name = ('A', 'B') ind = self.intIndex @@ -1509,15 +1511,30 @@ def test_set_names_and_rename(self): self.assertIsNone(res) self.assertEqual(ind.names, new_names2) - def test_set_levels_and_set_labels(self): + # set names for specific level (# GH7792) + ind = self.index.set_names(new_names[0], level=0) + self.assertEqual(self.index.names, self.index_names) + self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) + + res = ind.set_names(new_names2[0], level=0, inplace=True) + self.assertIsNone(res) + self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) + + # set names for multiple levels + ind = self.index.set_names(new_names, level=[0, 1]) + self.assertEqual(self.index.names, self.index_names) + self.assertEqual(ind.names, new_names) + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + self.assertIsNone(res) + self.assertEqual(ind.names, new_names2) + + + def test_set_levels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. levels, labels = self.index.levels, self.index.labels new_levels = [[lev + 'a' for lev in level] for level in levels] - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] def assert_matching(actual, expected): # avoid specifying internal representation @@ -1539,6 +1556,58 @@ def assert_matching(actual, expected): self.assertIsNone(inplace_return) assert_matching(ind2.levels, new_levels) + # level changing specific level [w/o mutation] + ind2 = self.index.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(self.index.levels, levels) + + ind2 = self.index.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(self.index.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = self.index.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(self.index.levels, levels) + + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(self.index.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + def test_set_labels(self): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + levels, labels = self.index.levels, self.index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + self.assertEqual(len(actual), len(expected)) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + assert_almost_equal(act, exp) + # label changing [w/o mutation] ind2 = self.index.set_labels(new_labels) assert_matching(ind2.labels, new_labels) @@ -1550,6 +1619,40 @@ def assert_matching(actual, expected): self.assertIsNone(inplace_return) assert_matching(ind2.labels, new_labels) + # label changing specific level [w/o mutation] + ind2 = self.index.set_labels(new_labels[0], level=0) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(self.index.labels, labels) + + ind2 = self.index.set_labels(new_labels[1], level=1) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(self.index.labels, labels) + + # label changing multiple levels [w/o mutation] + ind2 = self.index.set_labels(new_labels, level=[0, 1]) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + + # label changing specific level [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(self.index.labels, labels) + + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(self.index.labels, labels) + + # label changing multiple levels [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels, level=[0, 1], inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + def test_set_levels_labels_names_bad_input(self): levels, labels = self.index.levels, self.index.labels names = self.index.names @@ -1575,6 +1678,27 @@ def test_set_levels_labels_names_bad_input(self): with tm.assertRaisesRegexp(TypeError, 'list-like'): self.index.set_names(names[0]) + # should have equal lengths + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_levels(levels[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_levels(levels, level=0) + + # should have equal lengths + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_labels(labels[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_labels(labels, level=0) + + # should have equal lengths + with tm.assertRaisesRegexp(ValueError, 'Length of names'): + self.index.set_names(names[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'string'): + self.index.set_names(names, level=0) + def test_metadata_immutable(self): levels, labels = self.index.levels, self.index.labels # shouldn't be able to set at either the top level or base level