From aa3819949fb0decbb987b1846afe3dc6ffbb7be3 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 17 Oct 2013 07:26:24 -0400 Subject: [PATCH 1/2] BUG: fixed issue with drop on a non-unique index with Series (GH5248) --- doc/source/release.rst | 1 + pandas/core/generic.py | 2 ++ pandas/core/indexing.py | 4 +++- pandas/tests/test_frame.py | 6 ++++-- pandas/tests/test_series.py | 25 +++++++++++++++++++++++++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index cc9aa4bbc64ff..886a0a62b4068 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -621,6 +621,7 @@ Bug Fixes non-business date. (:issue:`5203`) - Fixed bug in Excel writers where frames with duplicate column names weren't written correctly. (:issue:`5235`) + - Fixed issue with ``drop`` and a non-unique index on Series (:issue:`5248`) pandas 0.12.0 ------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 266253e05ed61..fcbceecf9a19b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1103,6 +1103,8 @@ def drop(self, labels, axis=0, level=None): """ axis_name = self._get_axis_name(axis) axis, axis_ = self._get_axis(axis), axis + if not is_list_like(labels): + labels = [ labels ] if axis.is_unique: if level is not None: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d32bf166ddea1..0bc0afaf255f2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -581,9 +581,11 @@ def _multi_take_opportunity(self, tup): return False # just too complicated - for ax in self.obj._data.axes: + for indexer, ax in zip(tup,self.obj._data.axes): if isinstance(ax, MultiIndex): return False + elif com._is_bool_indexer(indexer): + return False return True diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index d74ea8a5d2ffc..3f5eef8c04f7d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3175,9 +3175,11 @@ def check(result, expected=None): # drop df = DataFrame([[1,5,7.],[1,5,7.],[1,5,7.]],columns=['bar','a','a']) - df = df.drop(['a'],axis=1) + result = df.drop(['a'],axis=1) expected = DataFrame([[1],[1],[1]],columns=['bar']) - check(df,expected) + check(result,expected) + result = df.drop('a',axis=1) + check(result,expected) # describe df = DataFrame([[1,1,1],[2,2,2],[3,3,3]],columns=['bar','a','a'],dtype='float64') diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 645533d5629d2..eb8969db9d15e 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1415,6 +1415,31 @@ def test_mask(self): rs = s.where(cond, np.nan) assert_series_equal(rs, s.mask(~cond)) + def test_drop(self): + + # unique + s = Series([1,2],index=['one','two']) + expected = Series([1],index=['one']) + result = s.drop(['two']) + assert_series_equal(result,expected) + result = s.drop('two') + assert_series_equal(result,expected) + + # non-unique + # GH 5248 + s = Series([1,1,2],index=['one','two','one']) + expected = Series([1,2],index=['one','one']) + result = s.drop(['two']) + assert_series_equal(result,expected) + result = s.drop('two') + assert_series_equal(result,expected) + + expected = Series([1],index=['two']) + result = s.drop(['one']) + assert_series_equal(result,expected) + result = s.drop('one') + assert_series_equal(result,expected) + def test_ix_setitem(self): inds = self.series.index[[3, 4, 7]] From ea4a09ee5b5502ddb95df0552289e24afac64634 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 17 Oct 2013 07:44:39 -0400 Subject: [PATCH 2/2] CLN: remove reindex_like from core/frame.py (use core/generic.py version) --- pandas/core/frame.py | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfc086b09730e..d2d5776c4a67d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -672,9 +672,9 @@ def to_dict(self, outtype='dict'): raise ValueError("outtype %s not understood" % outtype) def to_gbq(self, destination_table, schema=None, col_order=None, if_exists='fail', **kwargs): - """Write a DataFrame to a Google BigQuery table. - - If the table exists, the DataFrame will be appended. If not, a new table + """Write a DataFrame to a Google BigQuery table. + + If the table exists, the DataFrame will be appended. If not, a new table will be created, in which case the schema will have to be specified. By default, rows will be written in the order they appear in the DataFrame, though the user may specify an alternative order. @@ -2233,33 +2233,6 @@ def rename(self, index=None, columns=None, **kwargs): return super(DataFrame, self).rename(index=index, columns=columns, **kwargs) - def reindex_like(self, other, method=None, copy=True, limit=None, - fill_value=NA): - """ - Reindex DataFrame to match indices of another DataFrame, optionally - with filling logic - - Parameters - ---------- - other : DataFrame - method : string or None - copy : boolean, default True - limit : int, default None - Maximum size gap to forward or backward fill - - Notes - ----- - Like calling s.reindex(index=other.index, columns=other.columns, - method=...) - - Returns - ------- - reindexed : DataFrame - """ - return self.reindex(index=other.index, columns=other.columns, - method=method, copy=copy, limit=limit, - fill_value=fill_value) - def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): """