From 48e8fedc09feaab19eb4ffe09d5f82e099a236c8 Mon Sep 17 00:00:00 2001 From: jreback Date: Sun, 1 Sep 2013 17:35:01 -0400 Subject: [PATCH] BUG: (GH4726) bug in getting a cross-sectional using iloc/loc with a duplicate items index --- doc/source/release.rst | 4 +++- pandas/core/internals.py | 27 +++++++++++++-------------- pandas/tests/test_indexing.py | 14 ++++++++++++++ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 592d6804a04ee..b3500b4f98b37 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -307,8 +307,10 @@ See :ref:`Internal Refactoring` - Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the rhs (:issue:`4576`) - Fix error/dtype conversion with setitem of ``None`` on ``Series/DataFrame`` (:issue:`4667`) - Fix decoding based on a passed in non-default encoding in ``pd.read_stata`` (:issue:`4626`) - - Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename`` (:issue:`4718`, :issue:`4628`) - Fix ``DataFrame.from_records`` with a plain-vanilla ``ndarray``. (:issue:`4727`) + - Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``, + etc. (:issue:`4718`, :issue:`4628`) + - Bug in using ``iloc/loc`` with a cross-sectional and duplicate indicies (:issue:`4726`) pandas 0.12 =========== diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4344b2542ffd6..c6af7e27070a9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -72,9 +72,6 @@ def __init__(self, values, items, ref_items, ndim=None, fastpath=False, placemen self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items) - def _gi(self, arg): - return self.values[arg] - @property def _consolidate_key(self): return (self._can_consolidate, self.dtype.name) @@ -1165,9 +1162,6 @@ def __init__(self, values, items, ref_items, fastpath=False, placement=None, **k super(DatetimeBlock, self).__init__(values, items, ref_items, fastpath=True, placement=placement, **kwargs) - def _gi(self, arg): - return lib.Timestamp(self.values[arg]) - def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) @@ -1200,7 +1194,7 @@ def _try_coerce_result(self, result): if result.dtype == 'i8': result = tslib.array_to_datetime( result.astype(object).ravel()).reshape(result.shape) - elif isinstance(result, np.integer): + elif isinstance(result, (np.integer, np.datetime64)): result = lib.Timestamp(result) return result @@ -1267,10 +1261,9 @@ def set(self, item, value): self.values[loc] = value def get_values(self, dtype=None): + # return object dtype as Timestamps if dtype == object: - flat_i8 = self.values.ravel().view(np.int64) - res = tslib.ints_to_pydatetime(flat_i8) - return res.reshape(self.values.shape) + return lib.map_infer(self.values.ravel(), lib.Timestamp).reshape(self.values.shape) return self.values @@ -2272,7 +2265,8 @@ def xs(self, key, axis=1, copy=True): def fast_2d_xs(self, loc, copy=False): """ - + get a cross sectional for a given location in the + items ; handle dups """ if len(self.blocks) == 1: result = self.blocks[0].values[:, loc] @@ -2284,15 +2278,20 @@ def fast_2d_xs(self, loc, copy=False): raise Exception('cannot get view of mixed-type or ' 'non-consolidated DataFrame') - dtype = _interleaved_dtype(self.blocks) - items = self.items + + # non-unique (GH4726) + if not items.is_unique: + return self._interleave(items).ravel() + + # unique + dtype = _interleaved_dtype(self.blocks) n = len(items) result = np.empty(n, dtype=dtype) for blk in self.blocks: for j, item in enumerate(blk.items): i = items.get_loc(item) - result[i] = blk._gi((j, loc)) + result[i] = blk._try_coerce_result(blk.iget((j, loc))) return result diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 76003de65180f..da4e1e98e6b7a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1215,6 +1215,20 @@ def test_astype_assignment_with_iloc(self): result = df.get_dtype_counts().sort_index() expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index() + def test_dups_loc(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame([[1,2,'foo','bar',Timestamp('20130101')]], + columns=['a','a','a','a','a'],index=[1]) + expected = Series([1,2,'foo','bar',Timestamp('20130101')],index=['a','a','a','a','a']) + + result = df.iloc[0] + assert_series_equal(result,expected) + + result = df.loc[1] + assert_series_equal(result,expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],