diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 02de919e3f83e..6a14a4024ba5a 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -49,7 +49,7 @@ API changes In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c']) In [4]: cat - Out[4]: + Out[4]: [a, b, a] Categories (3, object): [a < b < c] diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 5e75d9ed011a2..df21e51d100fc 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -23,6 +23,13 @@ Enhancements +- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`) + + .. ipython:: python + + df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) + df.drop(['A', 'X'], axis=1, errors='ignore') + .. _whatsnew_0161.api: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 012a73fac1ef4..30a3601a5a4bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1557,7 +1557,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None): return self.reindex(**d) - def drop(self, labels, axis=0, level=None, inplace=False): + def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): """ Return new object with labels in requested axis removed @@ -1569,6 +1569,8 @@ def drop(self, labels, axis=0, level=None, inplace=False): For MultiIndex inplace : bool, default False If True, do operation inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. Returns ------- @@ -1582,9 +1584,9 @@ def drop(self, labels, axis=0, level=None, inplace=False): if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') - new_axis = axis.drop(labels, level=level) + new_axis = axis.drop(labels, level=level, errors=errors) else: - new_axis = axis.drop(labels) + new_axis = axis.drop(labels, errors=errors) dropped = self.reindex(**{axis_name: new_axis}) try: dropped.axes[axis_].set_names(axis.names, inplace=True) diff --git a/pandas/core/index.py b/pandas/core/index.py index e335d00551bab..fd11cd7f598c3 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2325,13 +2325,15 @@ def insert(self, loc, item): (_self[:loc], item_idx, _self[loc:])) return Index(idx, name=self.name) - def drop(self, labels): + def drop(self, labels, errors='raise'): """ Make new Index with passed list of labels deleted Parameters ---------- labels : array-like + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. Returns ------- @@ -2341,7 +2343,9 @@ def drop(self, labels): indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): - raise ValueError('labels %s not contained in axis' % labels[mask]) + if errors != 'ignore': + raise ValueError('labels %s not contained in axis' % labels[mask]) + indexer = indexer[~mask] return self.delete(indexer) @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs) @@ -3847,7 +3851,7 @@ def repeat(self, n): sortorder=self.sortorder, verify_integrity=False) - def drop(self, labels, level=None): + def drop(self, labels, level=None, errors='raise'): """ Make new MultiIndex with passed list of labels deleted @@ -3870,19 +3874,24 @@ def drop(self, labels, level=None): indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): - raise ValueError('labels %s not contained in axis' - % labels[mask]) - return self.delete(indexer) + if errors != 'ignore': + raise ValueError('labels %s not contained in axis' + % labels[mask]) + indexer = indexer[~mask] except Exception: pass inds = [] for label in labels: - loc = self.get_loc(label) - if isinstance(loc, int): - inds.append(loc) - else: - inds.extend(lrange(loc.start, loc.stop)) + try: + loc = self.get_loc(label) + if isinstance(loc, int): + inds.append(loc) + else: + inds.extend(lrange(loc.start, loc.stop)) + except KeyError: + if errors != 'ignore': + raise return self.delete(inds) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index cdda087b27613..04335991bd614 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7409,6 +7409,26 @@ def test_drop_names(self): self.assertEqual(obj.columns.name, 'second') self.assertEqual(list(df.columns), ['d', 'e', 'f']) + self.assertRaises(ValueError, df.drop, ['g']) + self.assertRaises(ValueError, df.drop, ['g'], 1) + + # errors = 'ignore' + dropped = df.drop(['g'], errors='ignore') + expected = Index(['a', 'b', 'c']) + self.assert_index_equal(dropped.index, expected) + + dropped = df.drop(['b', 'g'], errors='ignore') + expected = Index(['a', 'c']) + self.assert_index_equal(dropped.index, expected) + + dropped = df.drop(['g'], axis=1, errors='ignore') + expected = Index(['d', 'e', 'f']) + self.assert_index_equal(dropped.columns, expected) + + dropped = df.drop(['d', 'g'], axis=1, errors='ignore') + expected = Index(['e', 'f']) + self.assert_index_equal(dropped.columns, expected) + def test_dropEmptyRows(self): N = len(self.frame.index) mat = randn(N) @@ -7787,6 +7807,19 @@ def test_drop(self): assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.ix[[2], :]) assert_frame_equal(simple.drop([0, 3], axis='index'), simple.ix[[1, 2], :]) + self.assertRaises(ValueError, simple.drop, 5) + self.assertRaises(ValueError, simple.drop, 'C', 1) + self.assertRaises(ValueError, simple.drop, [1, 5]) + self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1) + + # errors = 'ignore' + assert_frame_equal(simple.drop(5, errors='ignore'), simple) + assert_frame_equal(simple.drop([0, 5], errors='ignore'), + simple.ix[[1, 2, 3], :]) + assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple) + assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'), + simple[['B']]) + #non-unique - wheee! nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')), columns=['a', 'a', 'b']) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 39db387045f12..61cb337880c00 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1036,20 +1036,43 @@ def check_slice(in_slice, expected): def test_drop(self): n = len(self.strIndex) - dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)]) + drop = self.strIndex[lrange(5, 10)] + dropped = self.strIndex.drop(drop) expected = self.strIndex[lrange(5) + lrange(10, n)] self.assertTrue(dropped.equals(expected)) self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) + self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar']) + + # errors='ignore' + mixed = drop.tolist() + ['foo'] + dropped = self.strIndex.drop(mixed, errors='ignore') + expected = self.strIndex[lrange(5) + lrange(10, n)] + self.assert_index_equal(dropped, expected) + + dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore') + expected = self.strIndex[lrange(n)] + self.assert_index_equal(dropped, expected) dropped = self.strIndex.drop(self.strIndex[0]) expected = self.strIndex[1:] - self.assertTrue(dropped.equals(expected)) + self.assert_index_equal(dropped, expected) ser = Index([1, 2, 3]) dropped = ser.drop(1) expected = Index([2, 3]) - self.assertTrue(dropped.equals(expected)) + self.assert_index_equal(dropped, expected) + + # errors='ignore' + self.assertRaises(ValueError, ser.drop, [3, 4]) + + dropped = ser.drop(4, errors='ignore') + expected = Index([1, 2, 3]) + self.assert_index_equal(dropped, expected) + + dropped = ser.drop([3, 4, 5], errors='ignore') + expected = Index([1, 2]) + self.assert_index_equal(dropped, expected) def test_tuple_union_bug(self): import pandas @@ -3529,21 +3552,50 @@ def test_drop(self): dropped2 = self.index.drop(index) expected = self.index[[0, 2, 3, 5]] - self.assertTrue(dropped.equals(expected)) - self.assertTrue(dropped2.equals(expected)) + self.assert_index_equal(dropped, expected) + self.assert_index_equal(dropped2, expected) dropped = self.index.drop(['bar']) expected = self.index[[0, 1, 3, 4, 5]] - self.assertTrue(dropped.equals(expected)) + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop('foo') + expected = self.index[[2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) index = MultiIndex.from_tuples([('bar', 'two')]) self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) self.assertRaises(KeyError, self.index.drop, index) + self.assertRaises(KeyError, self.index.drop, ['foo', 'two']) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) + self.assertRaises(KeyError, self.index.drop, mixed_index) + + # error='ignore' + dropped = self.index.drop(index, errors='ignore') + expected = self.index[[0, 1, 2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop(mixed_index, errors='ignore') + expected = self.index[[0, 1, 2, 3, 5]] + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop(['foo', 'two'], errors='ignore') + expected = self.index[[2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) # mixed partial / full drop dropped = self.index.drop(['foo', ('qux', 'one')]) expected = self.index[[2, 3, 5]] - self.assertTrue(dropped.equals(expected)) + self.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ['foo', ('qux', 'one'), 'two'] + self.assertRaises(KeyError, self.index.drop, mixed_index) + dropped = self.index.drop(mixed_index, errors='ignore') + expected = self.index[[2, 3, 5]] + self.assert_index_equal(dropped, expected) def test_droplevel_with_names(self): index = self.index[self.index.get_loc('foo')] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index cab668b3118fd..0fd03cb5804a8 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1984,6 +1984,15 @@ def check_drop(drop_val, axis_number, aliases, expected): expected = Panel({"One": df}) check_drop('Two', 0, ['items'], expected) + self.assertRaises(ValueError, panel.drop, 'Three') + + # errors = 'ignore' + dropped = panel.drop('Three', errors='ignore') + assert_panel_equal(dropped, panel) + dropped = panel.drop(['Two', 'Three'], errors='ignore') + expected = Panel({"One": df}) + assert_panel_equal(dropped, expected) + # Major exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) expected = Panel({"One": exp_df, "Two": exp_df}) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index c021bb1bf2fd6..f044fe540ea24 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1954,6 +1954,14 @@ def test_drop(self): self.assertRaises(ValueError, s.drop, 'bc') self.assertRaises(ValueError, s.drop, ('a',)) + # errors='ignore' + s = Series(range(3),index=list('abc')) + result = s.drop('bc', errors='ignore') + assert_series_equal(result, s) + result = s.drop(['a', 'd'], errors='ignore') + expected = s.ix[1:] + assert_series_equal(result, expected) + # bad axis self.assertRaises(ValueError, s.drop, 'one', axis='columns')