Skip to content

BUG: Period/DateTime slicing and setitem fixes (fixes #2782, #2788, #2789) #2785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def _get_with(self, key):
key = list(key)

if isinstance(key, Index):
key_type = lib.infer_dtype(key.values)
key_type = key.inferred_type
else:
key_type = lib.infer_dtype(key)

Expand Down Expand Up @@ -700,7 +700,7 @@ def _set_with(self, key, value):
key = list(key)

if isinstance(key, Index):
key_type = lib.infer_dtype(key.values)
key_type = key.inferred_type
else:
key_type = lib.infer_dtype(key)

Expand Down
146 changes: 146 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2544,6 +2544,152 @@ def test_asof(self):
d = self.ts.index[0] - datetools.bday
self.assert_(np.isnan(self.ts.asof(d)))

def test_getitem_setitem_datetimeindex(self):
from pytz import timezone as tz
from pandas import date_range
N = 50
# testing with timezone, GH #2785
rng = date_range('1/1/1990', periods=N, freq='H', tz='US/Eastern')
ts = Series(np.random.randn(N), index=rng)

result = ts["1990-01-01 04:00:00"]
expected = ts[4]
self.assert_(result == expected)

result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts[4]
assert_series_equal(result, ts)

result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
assert_series_equal(result, ts)

lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)

# repeat all the above with naive datetimes
result = ts[datetime(1990, 1, 1, 4)]
expected = ts[4]
self.assert_(result == expected)

result = ts.copy()
result[datetime(1990, 1, 1, 4)] = 0
result[datetime(1990, 1, 1, 4)] = ts[4]
assert_series_equal(result, ts)

result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts.copy()
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
assert_series_equal(result, ts)

lb = datetime(1990, 1, 1, 4)
rb = datetime(1990, 1, 1, 7)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts[ts.index[4]]
expected = ts[4]
self.assert_(result == expected)

result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)

# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
assert_series_equal(result, expected)

result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
assert_series_equal(result, ts)

# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts[4]
assert_series_equal(result, ts)

result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts[4]
assert_series_equal(result, ts)

# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tz('UTC'))] = ts[4]
assert_series_equal(result, ts)

result = ts.copy()
result[datetime(1990, 1, 1, 3, tzinfo=tz('US/Central'))] = 0
result[datetime(1990, 1, 1, 3, tzinfo=tz('US/Central'))] = ts[4]
assert_series_equal(result, ts)

def test_getitem_setitem_periodindex(self):
from pandas import period_range, Period
N = 50
rng = period_range('1/1/1990', periods=N, freq='H')
ts = Series(np.random.randn(N), index=rng)

result = ts["1990-01-01 04"]
expected = ts[4]
self.assert_(result == expected)

result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts[4]
assert_series_equal(result, ts)

result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
assert_series_equal(result, ts)

lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
assert_series_equal(result, expected)

# GH 2782
result = ts[ts.index[4]]
expected = ts[4]
self.assert_(result == expected)

result = ts[ts.index[4:8]]
expected = ts[4:8]
assert_series_equal(result, expected)

result = ts.copy()
result[ts.index[4:8]] = 0
result[4:8] = ts[4:8]
assert_series_equal(result, ts)

def test_asof_periodindex(self):
from pandas import period_range, PeriodIndex
# array or list or dates
Expand Down
73 changes: 41 additions & 32 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ def _dt_index_cmp(opname):
def wrapper(self, other):
func = getattr(super(DatetimeIndex, self), opname)
if isinstance(other, datetime):
func = getattr(self, opname)
other = _to_m8(other)
other = _to_m8(other, tz=self.tz)
elif isinstance(other, list):
other = DatetimeIndex(other)
elif isinstance(other, basestring):
other = _to_m8(other, tz=self.tz)
elif not isinstance(other, np.ndarray):
other = _ensure_datetime64(other)
result = func(other)
Expand Down Expand Up @@ -1042,34 +1043,36 @@ def _partial_date_slice(self, reso, parsed):
'time series.')

if reso == 'year':
t1 = Timestamp(datetime(parsed.year, 1, 1))
t2 = Timestamp(datetime(parsed.year, 12, 31))
t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
t2 = Timestamp(datetime(parsed.year, 12, 31), tz=self.tz)
elif reso == 'month':
d = tslib.monthrange(parsed.year, parsed.month)[1]
t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
t2 = Timestamp(datetime(parsed.year, parsed.month, d))
t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
t2 = Timestamp(datetime(parsed.year, parsed.month, d), tz=self.tz)
elif reso == 'quarter':
qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
d = tslib.monthrange(parsed.year, qe)[1] # at end of month
t1 = Timestamp(datetime(parsed.year, parsed.month, 1))
t2 = Timestamp(datetime(parsed.year, qe, d))
t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
t2 = Timestamp(datetime(parsed.year, qe, d), tz=self.tz)
elif reso == 'day' and self._resolution < Resolution.RESO_DAY:
st = datetime(parsed.year, parsed.month, parsed.day)
t1 = Timestamp(st)
t1 = Timestamp(st, tz=self.tz)
t2 = st + offsets.Day()
t2 = Timestamp(Timestamp(t2).value - 1)
t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
elif (reso == 'hour' and
self._resolution < Resolution.RESO_HR):
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour)
t1 = Timestamp(st)
t2 = Timestamp(Timestamp(st + offsets.Hour()).value - 1)
t1 = Timestamp(st, tz=self.tz)
t2 = Timestamp(Timestamp(st + offsets.Hour(),
tz=self.tz).value - 1)
elif (reso == 'minute' and
self._resolution < Resolution.RESO_MIN):
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute)
t1 = Timestamp(st)
t2 = Timestamp(Timestamp(st + offsets.Minute()).value - 1)
t1 = Timestamp(st, tz=self.tz)
t2 = Timestamp(Timestamp(st + offsets.Minute(),
tz=self.tz).value - 1)
else:
raise KeyError

Expand All @@ -1088,10 +1091,14 @@ def get_value(self, series, key):
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
if isinstance(key, datetime):
# needed to localize naive datetimes
stamp = Timestamp(key, tz=self.tz)
return self._engine.get_value(series, stamp)

try:
return Index.get_value(self, series, key)
except KeyError:

try:
loc = self._get_string_slice(key)
return series[loc]
Expand All @@ -1102,14 +1109,11 @@ def get_value(self, series, key):
locs = self.indexer_at_time(key)
return series.take(locs)

if isinstance(key, basestring):
stamp = Timestamp(key, tz=self.tz)
else:
stamp = Timestamp(key)
try:
stamp = Timestamp(key, tz=self.tz)
return self._engine.get_value(series, stamp)
except KeyError:
raise KeyError(stamp)
except (KeyError, ValueError):
raise KeyError(key)

def get_loc(self, key):
"""
Expand All @@ -1119,27 +1123,32 @@ def get_loc(self, key):
-------
loc : int
"""
if isinstance(key, datetime):
# needed to localize naive datetimes
stamp = Timestamp(key, tz=self.tz)
return self._engine.get_loc(stamp)

try:
return self._engine.get_loc(key)
except KeyError:
return Index.get_loc(self, key)
except (KeyError, ValueError):
try:
return self._get_string_slice(key)
except (TypeError, KeyError, ValueError):
pass

if isinstance(key, time):
return self.indexer_at_time(key)

try:
return self._engine.get_loc(Timestamp(key))
stamp = Timestamp(key, tz=self.tz)
return self._engine.get_loc(stamp)
except (KeyError, ValueError):
raise KeyError(key)

def _get_string_slice(self, key):
freq = getattr(self, 'freqstr',
getattr(self, 'inferred_freq', None))
asdt, parsed, reso = parse_time_string(key, freq)
key = asdt
_, parsed, reso = parse_time_string(key, freq)
loc = self._partial_date_slice(reso, parsed)
return loc

Expand Down Expand Up @@ -1250,7 +1259,7 @@ def searchsorted(self, key, side='left'):
if isinstance(key, np.ndarray):
key = np.array(key, dtype=_NS_DTYPE, copy=False)
else:
key = _to_m8(key)
key = _to_m8(key, tz=self.tz)

return self.values.searchsorted(key, side=side)

Expand Down Expand Up @@ -1339,7 +1348,7 @@ def insert(self, loc, item):
new_index : Index
"""
if isinstance(item, datetime):
item = _to_m8(item)
item = _to_m8(item, tz=self.tz)

new_index = np.concatenate((self[:loc].asi8,
[item.view(np.int64)],
Expand Down Expand Up @@ -1613,13 +1622,13 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
freq=freq, tz=tz, normalize=normalize, name=name)


def _to_m8(key):
def _to_m8(key, tz=None):
'''
Timestamp-like => dt64
'''
if not isinstance(key, datetime):
if not isinstance(key, Timestamp):
# this also converts strings
key = Timestamp(key)
key = Timestamp(key, tz=tz)

return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE)

Expand Down
22 changes: 11 additions & 11 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -616,27 +616,26 @@ cdef convert_to_tsobject(object ts, object tz):
if tz is not None:
# sort of a temporary hack
if ts.tzinfo is not None:
if hasattr(tz, 'normalize'):
if (hasattr(tz, 'normalize') and
hasattr(ts.tzinfo, '_utcoffset')):
ts = tz.normalize(ts)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
else: #tzoffset
ts_offset = _get_utcoffset(ts.tzinfo, ts)
obj.value = _pydatetime_to_dts(ts, &obj.dts)
ts_offset = _get_utcoffset(ts.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(ts_offset)
tz_offset = _get_utcoffset(tz, ts)
obj.value += _delta_to_nanoseconds(tz_offset)

pandas_datetime_to_datetimestruct(obj.value,
PANDAS_FR_ns, &obj.dts)
obj.tzinfo = tz
elif not _is_utc(tz):
try:
if (hasattr(tz, 'localize')):
ts = tz.localize(ts)
except AttributeError:
else:
ts = ts.replace(tzinfo=tz)

obj.value = _pydatetime_to_dts(ts, &obj.dts)
offset = _get_utcoffset(ts.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(offset)
obj.tzinfo = ts.tzinfo
else:
# UTC
Expand All @@ -645,9 +644,10 @@ cdef convert_to_tsobject(object ts, object tz):
else:
obj.value = _pydatetime_to_dts(ts, &obj.dts)
obj.tzinfo = ts.tzinfo
if obj.tzinfo is not None and not _is_utc(obj.tzinfo):
offset = _get_utcoffset(obj.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(offset)

if obj.tzinfo is not None and not _is_utc(obj.tzinfo):
offset = _get_utcoffset(obj.tzinfo, ts)
obj.value -= _delta_to_nanoseconds(offset)

if is_timestamp(ts):
obj.value += ts.nanosecond
Expand Down