From e546e5335a13afdb45b954f1072115f9f0aea8b6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Sat, 27 Jul 2013 14:04:45 -0400 Subject: [PATCH] BUG: fix period index object instantiation when joining with self --- doc/source/release.rst | 3 ++ pandas/tests/test_index.py | 22 +++++++++++ pandas/tseries/period.py | 52 ++++++++++++------------- pandas/tseries/tests/test_period.py | 7 ++++ pandas/tseries/tests/test_timeseries.py | 6 +++ 5 files changed, 63 insertions(+), 27 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 779ec9852118d..e9af4ccf50dc4 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -89,6 +89,9 @@ pandas 0.13 - Fixed bug with duplicate columns and type conversion in ``read_json`` when ``orient='split'`` (:issue:`4377`) - Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`) + - Fixed an issue where ``PeriodIndex`` joining with self was returning a new + instance rather than the same instance (:issue:`4379`); also adds a test + for this for the other index types pandas 0.12 =========== diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 200bc5d6611f9..cc069a4da31e3 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -555,6 +555,15 @@ def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') self.assertEqual(idx.name, idx[1:].name) + def test_join_self(self): + indices = 'unicode', 'str', 'date', 'int', 'float' + kinds = 'outer', 'inner', 'left', 'right' + for index_kind in indices: + for kind in kinds: + res = getattr(self, '{0}Index'.format(index_kind)) + joined = res.join(res, how=kind) + self.assert_(res is joined) + class TestInt64Index(unittest.TestCase): _multiprocess_can_split_ = True @@ -834,6 +843,12 @@ def test_join_non_unique(self): exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64) self.assert_(np.array_equal(ridx, exp_ridx)) + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assert_(self.index is joined) + def test_intersection(self): other = Index([1, 2, 3, 4, 5]) result = self.index.intersection(other) @@ -1727,6 +1742,13 @@ def _check_all(other): self.assertRaises(Exception, self.index.join, self.index, level=1) + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + res = self.index + joined = res.join(res, how=kind) + self.assert_(res is joined) + def test_reindex(self): result, indexer = self.index.reindex(list(self.index[:4])) tm.assert_isinstance(result, MultiIndex) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index bf1199dc2690f..2dfb6a0d3d723 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -553,11 +553,9 @@ class PeriodIndex(Int64Index): __le__ = _period_index_cmp('__le__') __ge__ = _period_index_cmp('__ge__') - def __new__(cls, data=None, ordinal=None, - freq=None, start=None, end=None, periods=None, - copy=False, name=None, - year=None, month=None, quarter=None, day=None, - hour=None, minute=None, second=None, + def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, + periods=None, copy=False, name=None, year=None, month=None, + quarter=None, day=None, hour=None, minute=None, second=None, tz=None): freq = _freq_mod.get_standard_freq(freq) @@ -649,19 +647,18 @@ def _from_arraylike(cls, data, freq, tz): freq = getattr(data[0], 'freq', None) if freq is None: - raise ValueError(('freq not specified and cannot be ' - 'inferred from first element')) + raise ValueError('freq not specified and cannot be ' + 'inferred from first element') - if np.issubdtype(data.dtype, np.datetime64): - data = dt64arr_to_periodarr(data, freq, tz) - elif data.dtype == np.int64: - pass - else: - try: - data = com._ensure_int64(data) - except (TypeError, ValueError): - data = com._ensure_object(data) - data = _get_ordinals(data, freq) + if data.dtype != np.int64: + if np.issubdtype(data.dtype, np.datetime64): + data = dt64arr_to_periodarr(data, freq, tz) + else: + try: + data = com._ensure_int64(data) + except (TypeError, ValueError): + data = com._ensure_object(data) + data = _get_ordinals(data, freq) return data, freq @@ -1013,8 +1010,7 @@ def join(self, other, how='left', level=None, return_indexers=False): if return_indexers: result, lidx, ridx = result return self._apply_meta(result), lidx, ridx - else: - return self._apply_meta(result) + return self._apply_meta(result) def _assert_can_do_setop(self, other): if not isinstance(other, PeriodIndex): @@ -1031,9 +1027,10 @@ def _wrap_union_result(self, other, result): return result def _apply_meta(self, rawarr): - idx = rawarr.view(PeriodIndex) - idx.freq = self.freq - return idx + if not isinstance(rawarr, PeriodIndex): + rawarr = rawarr.view(PeriodIndex) + rawarr.freq = self.freq + return rawarr def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" @@ -1069,18 +1066,19 @@ def _format_native_types(self, na_rep=u('NaT'), **kwargs): return values.tolist() def __array_finalize__(self, obj): - if self.ndim == 0: # pragma: no cover + if not self.ndim: # pragma: no cover return self.item() self.freq = getattr(obj, 'freq', None) self.name = getattr(obj, 'name', None) def __repr__(self): - output = str(self.__class__) + '\n' - output += 'freq: ''%s''\n' % self.freq - if len(self) > 0: + output = com.pprint_thing(self.__class__) + '\n' + output += 'freq: %s\n' % self.freq + n = len(self) + if n: output += '[%s, ..., %s]\n' % (self[0], self[-1]) - output += 'length: %d' % len(self) + output += 'length: %d' % n return output def __unicode__(self): diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a5902ac718fa6..b7916bd98d70f 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1864,6 +1864,13 @@ def test_joins(self): tm.assert_isinstance(joined, PeriodIndex) self.assert_(joined.freq == index.freq) + def test_join_self(self): + index = period_range('1/1/2000', '1/20/2000', freq='D') + + for kind in ['inner', 'outer', 'left', 'right']: + res = index.join(index, how=kind) + self.assert_(index is res) + def test_align_series(self): rng = period_range('1/1/2000', '1/1/2010', freq='A') ts = Series(np.random.randn(len(rng)), index=rng) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 0fcdcf344ca38..e0413531d05b4 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1960,6 +1960,12 @@ def test_slice_keeps_name(self): dr = pd.date_range(st, et, freq='H', name='timebucket') self.assertEqual(dr[1:].name, dr.name) + def test_join_self(self): + index = date_range('1/1/2000', periods=10) + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = index.join(index, how=kind) + self.assert_(index is joined) class TestLegacySupport(unittest.TestCase): _multiprocess_can_split_ = True