From 3b7bd98868b0a561c3e1c2518e215c81bb62c278 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Thu, 31 Jan 2013 17:19:50 -0500 Subject: [PATCH 1/4] BUG: Period slicing with period_range returns error --- pandas/core/series.py | 4 ++-- pandas/tests/test_series.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b9c5bf33588d1..a54772e8c37db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -554,7 +554,7 @@ def _get_with(self, key): key = list(key) if isinstance(key, Index): - key_type = lib.infer_dtype(key.values) + key_type = key.inferred_type else: key_type = lib.infer_dtype(key) @@ -700,7 +700,7 @@ def _set_with(self, key, value): key = list(key) if isinstance(key, Index): - key_type = lib.infer_dtype(key.values) + key_type = key.inferred_type else: key_type = lib.infer_dtype(key) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 896c7dc34901f..111ae88cd4121 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2544,6 +2544,38 @@ def test_asof(self): d = self.ts.index[0] - datetools.bday self.assert_(np.isnan(self.ts.asof(d))) + def test_getitem_setitem_periodindex(self): + from pandas import period_range, Period + # array or list or dates + N = 50 + rng = period_range('1/1/1990', periods=N, freq='H') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + def test_asof_periodindex(self): from pandas import period_range, PeriodIndex # array or list or dates From 64b47e5b059fd880ab5a669f83079dbe114d053a Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Thu, 31 Jan 2013 21:53:46 -0500 Subject: [PATCH 2/4] BUG: Slicing and setitem inconsistent with getitem using DateTimeIndex with timezone --- pandas/tests/test_series.py | 68 ++++++++++++++++++++++++++++++++++++- pandas/tseries/index.py | 48 ++++++++++++++------------ 2 files changed, 94 insertions(+), 22 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 111ae88cd4121..e3be6977de67a 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2544,13 +2544,75 @@ def test_asof(self): d = self.ts.index[0] - datetools.bday self.assert_(np.isnan(self.ts.asof(d))) + def test_getitem_setitem_datetimeindex(self): + from pandas import date_range + N = 50 + # testing with timezone, GH #2785 + rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern') + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + self.assert_(result == expected) + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + self.assert_(result == expected) + + result = ts[ts.index[4:8]] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + assert_series_equal(result, ts) + def test_getitem_setitem_periodindex(self): from pandas import period_range, Period - # array or list or dates N = 50 rng = period_range('1/1/1990', periods=N, freq='H') ts = Series(np.random.randn(N), index=rng) + result = ts["1990-01-01 04"] + expected = ts[4] + self.assert_(result == expected) + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + assert_series_equal(result, ts) + result = ts["1990-01-01 04":"1990-01-01 07"] expected = ts[4:8] assert_series_equal(result, expected) @@ -2567,6 +2629,10 @@ def test_getitem_setitem_periodindex(self): assert_series_equal(result, expected) # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + self.assert_(result == expected) + result = ts[ts.index[4:8]] expected = ts[4:8] assert_series_equal(result, expected) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index e45c21084e45f..8d584f8c918bc 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -71,6 +71,8 @@ def wrapper(self, other): other = _to_m8(other) elif isinstance(other, list): other = DatetimeIndex(other) + elif isinstance(other, basestring): + other = _to_m8(Timestamp(other, tz=self.tz)) elif not isinstance(other, np.ndarray): other = _ensure_datetime64(other) result = func(other) @@ -1042,34 +1044,36 @@ def _partial_date_slice(self, reso, parsed): 'time series.') if reso == 'year': - t1 = Timestamp(datetime(parsed.year, 1, 1)) - t2 = Timestamp(datetime(parsed.year, 12, 31)) + t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz) + t2 = Timestamp(datetime(parsed.year, 12, 31), tz=self.tz) elif reso == 'month': d = tslib.monthrange(parsed.year, parsed.month)[1] - t1 = Timestamp(datetime(parsed.year, parsed.month, 1)) - t2 = Timestamp(datetime(parsed.year, parsed.month, d)) + t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz) + t2 = Timestamp(datetime(parsed.year, parsed.month, d), tz=self.tz) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead d = tslib.monthrange(parsed.year, qe)[1] # at end of month - t1 = Timestamp(datetime(parsed.year, parsed.month, 1)) - t2 = Timestamp(datetime(parsed.year, qe, d)) + t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz) + t2 = Timestamp(datetime(parsed.year, qe, d), tz=self.tz) elif reso == 'day' and self._resolution < Resolution.RESO_DAY: st = datetime(parsed.year, parsed.month, parsed.day) - t1 = Timestamp(st) + t1 = Timestamp(st, tz=self.tz) t2 = st + offsets.Day() - t2 = Timestamp(Timestamp(t2).value - 1) + t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1) elif (reso == 'hour' and self._resolution < Resolution.RESO_HR): st = datetime(parsed.year, parsed.month, parsed.day, hour=parsed.hour) - t1 = Timestamp(st) - t2 = Timestamp(Timestamp(st + offsets.Hour()).value - 1) + t1 = Timestamp(st, tz=self.tz) + t2 = Timestamp(Timestamp(st + offsets.Hour(), + tz=self.tz).value - 1) elif (reso == 'minute' and self._resolution < Resolution.RESO_MIN): st = datetime(parsed.year, parsed.month, parsed.day, hour=parsed.hour, minute=parsed.minute) - t1 = Timestamp(st) - t2 = Timestamp(Timestamp(st + offsets.Minute()).value - 1) + t1 = Timestamp(st, tz=self.tz) + t2 = Timestamp(Timestamp(st + offsets.Minute(), + tz=self.tz).value - 1) else: raise KeyError @@ -1091,7 +1095,6 @@ def get_value(self, series, key): try: return Index.get_value(self, series, key) except KeyError: - try: loc = self._get_string_slice(key) return series[loc] @@ -1102,11 +1105,11 @@ def get_value(self, series, key): locs = self.indexer_at_time(key) return series.take(locs) - if isinstance(key, basestring): - stamp = Timestamp(key, tz=self.tz) - else: - stamp = Timestamp(key) try: + if isinstance(key, basestring): + stamp = Timestamp(key, tz=self.tz) + else: + stamp = Timestamp(key) return self._engine.get_value(series, stamp) except KeyError: raise KeyError(stamp) @@ -1131,15 +1134,18 @@ def get_loc(self, key): return self.indexer_at_time(key) try: - return self._engine.get_loc(Timestamp(key)) + if isinstance(key, basestring): + stamp = Timestamp(key, tz=self.tz) + else: + stamp = Timestamp(key) + return self._engine.get_loc(stamp) except (KeyError, ValueError): raise KeyError(key) def _get_string_slice(self, key): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) - asdt, parsed, reso = parse_time_string(key, freq) - key = asdt + _, parsed, reso = parse_time_string(key, freq) loc = self._partial_date_slice(reso, parsed) return loc @@ -1617,7 +1623,7 @@ def _to_m8(key): ''' Timestamp-like => dt64 ''' - if not isinstance(key, datetime): + if not isinstance(key, (Timestamp, datetime)): # this also converts strings key = Timestamp(key) From 20fe6493356cefaafc06cb7cb9966457c0a81eb0 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Fri, 1 Feb 2013 02:18:35 -0500 Subject: [PATCH 3/4] BUG: Timestamp constructor not handling timezone conversions correctly --- pandas/tests/test_series.py | 22 ++++++++++++++++++++++ pandas/tslib.pyx | 22 +++++++++++----------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index e3be6977de67a..9d8108819455c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2545,6 +2545,7 @@ def test_asof(self): self.assert_(np.isnan(self.ts.asof(d))) def test_getitem_setitem_datetimeindex(self): + from pytz import timezone as tz from pandas import date_range N = 50 # testing with timezone, GH #2785 @@ -2598,6 +2599,27 @@ def test_getitem_setitem_datetimeindex(self): result["1990-01-02"] = ts[24:48] assert_series_equal(result, ts) + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 9, 0, 0, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, 0, 0, tzinfo=tz('UTC'))] = ts[4] + assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 3, 0, 0, tzinfo=tz('US/Central'))] = 0 + result[datetime(1990, 1, 1, 3, 0, 0, tzinfo=tz('US/Central'))] = ts[4] + assert_series_equal(result, ts) + def test_getitem_setitem_periodindex(self): from pandas import period_range, Period N = 50 diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index bbbe090225b83..c0b500afef5ba 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -616,27 +616,26 @@ cdef convert_to_tsobject(object ts, object tz): if tz is not None: # sort of a temporary hack if ts.tzinfo is not None: - if hasattr(tz, 'normalize'): + if (hasattr(tz, 'normalize') and + hasattr(ts.tzinfo, '_utcoffset')): ts = tz.normalize(ts) obj.value = _pydatetime_to_dts(ts, &obj.dts) obj.tzinfo = ts.tzinfo else: #tzoffset - ts_offset = _get_utcoffset(ts.tzinfo, ts) obj.value = _pydatetime_to_dts(ts, &obj.dts) + ts_offset = _get_utcoffset(ts.tzinfo, ts) obj.value -= _delta_to_nanoseconds(ts_offset) tz_offset = _get_utcoffset(tz, ts) obj.value += _delta_to_nanoseconds(tz_offset) - + pandas_datetime_to_datetimestruct(obj.value, + PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz elif not _is_utc(tz): - try: + if (hasattr(tz, 'localize')): ts = tz.localize(ts) - except AttributeError: + else: ts = ts.replace(tzinfo=tz) - obj.value = _pydatetime_to_dts(ts, &obj.dts) - offset = _get_utcoffset(ts.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(offset) obj.tzinfo = ts.tzinfo else: # UTC @@ -645,9 +644,10 @@ cdef convert_to_tsobject(object ts, object tz): else: obj.value = _pydatetime_to_dts(ts, &obj.dts) obj.tzinfo = ts.tzinfo - if obj.tzinfo is not None and not _is_utc(obj.tzinfo): - offset = _get_utcoffset(obj.tzinfo, ts) - obj.value -= _delta_to_nanoseconds(offset) + + if obj.tzinfo is not None and not _is_utc(obj.tzinfo): + offset = _get_utcoffset(obj.tzinfo, ts) + obj.value -= _delta_to_nanoseconds(offset) if is_timestamp(ts): obj.value += ts.nanosecond From d7ca0b38aef8d440b1a42e4ca95f32f4dae84719 Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Fri, 1 Feb 2013 12:26:09 -0500 Subject: [PATCH 4/4] BUG: naive datetime keys not localized to DateTimeIndex tz --- pandas/tests/test_series.py | 34 ++++++++++++++++++++++++---- pandas/tseries/index.py | 45 ++++++++++++++++++++----------------- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 9d8108819455c..357cef72a329b 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2576,6 +2576,31 @@ def test_getitem_setitem_datetimeindex(self): expected = ts[4:8] assert_series_equal(result, expected) + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + self.assert_(result == expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] + expected = ts[4:8] + assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8] + assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + assert_series_equal(result, expected) + result = ts[ts.index[4]] expected = ts[4] self.assert_(result == expected) @@ -2610,14 +2635,15 @@ def test_getitem_setitem_datetimeindex(self): result["1990-01-01 03:00:00-06:00"] = ts[4] assert_series_equal(result, ts) + # repeat with datetimes result = ts.copy() - result[datetime(1990, 1, 1, 9, 0, 0, tzinfo=tz('UTC'))] = 0 - result[datetime(1990, 1, 1, 9, 0, 0, tzinfo=tz('UTC'))] = ts[4] + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4] assert_series_equal(result, ts) result = ts.copy() - result[datetime(1990, 1, 1, 3, 0, 0, tzinfo=tz('US/Central'))] = 0 - result[datetime(1990, 1, 1, 3, 0, 0, tzinfo=tz('US/Central'))] = ts[4] + result[datetime(1990, 1, 1, 3, tzinfo=tz('US/Central'))] = 0 + result[datetime(1990, 1, 1, 3, tzinfo=tz('US/Central'))] = ts[4] assert_series_equal(result, ts) def test_getitem_setitem_periodindex(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 8d584f8c918bc..cf9c77f0cfb1a 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -67,12 +67,11 @@ def _dt_index_cmp(opname): def wrapper(self, other): func = getattr(super(DatetimeIndex, self), opname) if isinstance(other, datetime): - func = getattr(self, opname) - other = _to_m8(other) + other = _to_m8(other, tz=self.tz) elif isinstance(other, list): other = DatetimeIndex(other) elif isinstance(other, basestring): - other = _to_m8(Timestamp(other, tz=self.tz)) + other = _to_m8(other, tz=self.tz) elif not isinstance(other, np.ndarray): other = _ensure_datetime64(other) result = func(other) @@ -1092,6 +1091,11 @@ def get_value(self, series, key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ + if isinstance(key, datetime): + # needed to localize naive datetimes + stamp = Timestamp(key, tz=self.tz) + return self._engine.get_value(series, stamp) + try: return Index.get_value(self, series, key) except KeyError: @@ -1106,13 +1110,10 @@ def get_value(self, series, key): return series.take(locs) try: - if isinstance(key, basestring): - stamp = Timestamp(key, tz=self.tz) - else: - stamp = Timestamp(key) + stamp = Timestamp(key, tz=self.tz) return self._engine.get_value(series, stamp) - except KeyError: - raise KeyError(stamp) + except (KeyError, ValueError): + raise KeyError(key) def get_loc(self, key): """ @@ -1122,9 +1123,14 @@ def get_loc(self, key): ------- loc : int """ + if isinstance(key, datetime): + # needed to localize naive datetimes + stamp = Timestamp(key, tz=self.tz) + return self._engine.get_loc(stamp) + try: - return self._engine.get_loc(key) - except KeyError: + return Index.get_loc(self, key) + except (KeyError, ValueError): try: return self._get_string_slice(key) except (TypeError, KeyError, ValueError): @@ -1132,12 +1138,9 @@ def get_loc(self, key): if isinstance(key, time): return self.indexer_at_time(key) - + try: - if isinstance(key, basestring): - stamp = Timestamp(key, tz=self.tz) - else: - stamp = Timestamp(key) + stamp = Timestamp(key, tz=self.tz) return self._engine.get_loc(stamp) except (KeyError, ValueError): raise KeyError(key) @@ -1256,7 +1259,7 @@ def searchsorted(self, key, side='left'): if isinstance(key, np.ndarray): key = np.array(key, dtype=_NS_DTYPE, copy=False) else: - key = _to_m8(key) + key = _to_m8(key, tz=self.tz) return self.values.searchsorted(key, side=side) @@ -1345,7 +1348,7 @@ def insert(self, loc, item): new_index : Index """ if isinstance(item, datetime): - item = _to_m8(item) + item = _to_m8(item, tz=self.tz) new_index = np.concatenate((self[:loc].asi8, [item.view(np.int64)], @@ -1619,13 +1622,13 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, freq=freq, tz=tz, normalize=normalize, name=name) -def _to_m8(key): +def _to_m8(key, tz=None): ''' Timestamp-like => dt64 ''' - if not isinstance(key, (Timestamp, datetime)): + if not isinstance(key, Timestamp): # this also converts strings - key = Timestamp(key) + key = Timestamp(key, tz=tz) return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE)