Skip to content

Commit f0fbcbb

Browse files
committed
BUG/ENH: cleanup for Timestamp arithmetic
Fixes GH8865 (Timestamp - Timestamp -> Timedelta) This PR cleans up and extends `Timestamp` arithmetic similarly to the treatment for `Timedelta` in GH8884. It includes a new `to_datetime64()` method, and arithmetic now works between Timestamp and ndarrays. I also ensured comparison operations work properly between all of (Timestamp, Timedelta, NaT) and ndarrays. Implementation notes: wide use of the `NotImplemented` singleton let me cleanup many of these complex cases. I also strove to reduce the tight- coupling of `Timestamp`/`Timedelta` to pandas itself by removing use of the `_typ` property in tslib (I honestly don't quite understand why it needs to exist) and by not treating series/index any differently from any other ndarray-like object.
1 parent ff0756f commit f0fbcbb

File tree

6 files changed

+173
-79
lines changed

6 files changed

+173
-79
lines changed

doc/source/whatsnew/v0.15.2.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ Enhancements
6666
- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`).
6767
- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here<remote_data.ga>`.
6868
- Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here <io.stata-categorical>` for more information on importing categorical variables from Stata data files.
69-
- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`).
70-
- ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`).
71-
- Added ``Timedelta.to_timedelta64`` method to the public API (:issue:`8884`).
69+
- ``Timestamp`` and ``Timedelta`` arithmetic and comparisons return ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`, :issue:`TBD`).
70+
- ``Timestamp`` and ``Timedelta`` now support arithmetic and comparisons with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`, :issue:`TBD`).
71+
- Added ``Timestamp.to_datetime64`` and ``Timedelta.to_timedelta64`` methods to the public API (:issue:`8884`, :issue:`TBD`).
7272

7373
.. _whatsnew_0152.performance:
7474

@@ -93,6 +93,7 @@ Bug Fixes
9393
- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`).
9494
- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
9595
- Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons (:issue:`8813`, :issue:`5963`, :issue:`5436`).
96+
- The difference of two ``Timestamp`` objects is now a ``pandas.Timedelta`` rather than only a ``datetime.timedelta`` (:issue:`8865`).
9697
- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
9798
- ``slice`` string method now takes step into account (:issue:`8754`)
9899
- Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`)

pandas/tseries/base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def __add__(self, other):
316316
return self._add_delta(other)
317317
elif com.is_integer(other):
318318
return self.shift(other)
319-
elif isinstance(other, (tslib.Timestamp, datetime)):
319+
elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)):
320320
return self._add_datelike(other)
321321
else: # pragma: no cover
322322
return NotImplemented
@@ -339,14 +339,18 @@ def __sub__(self, other):
339339
return self._add_delta(-other)
340340
elif com.is_integer(other):
341341
return self.shift(-other)
342-
elif isinstance(other, (tslib.Timestamp, datetime)):
342+
elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)):
343343
return self._sub_datelike(other)
344344
else: # pragma: no cover
345345
return NotImplemented
346346
cls.__sub__ = __sub__
347347

348348
def __rsub__(self, other):
349-
return -self + other
349+
from pandas.tseries.tdi import TimedeltaIndex
350+
if isinstance(self, TimedeltaIndex):
351+
return -self + other
352+
else:
353+
return -(self - other)
350354
cls.__rsub__ = __rsub__
351355

352356
cls.__iadd__ = __add__

pandas/tseries/tests/test_timedeltas.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import division
44
from datetime import datetime, timedelta, time
55
import nose
6+
import operator
67

78
from distutils.version import LooseVersion
89
import numpy as np
@@ -288,6 +289,30 @@ def test_compare_timedelta_series(self):
288289
expected = pd.Series([False, True])
289290
tm.assert_series_equal(actual, expected)
290291

292+
def test_compare_timedelta_ndarray(self):
293+
lhs = pd.to_timedelta(['1 day', '3 days']).values
294+
rhs = Timedelta('2 day')
295+
296+
nat = Timedelta('nat')
297+
expected_nat = np.array([False, False])
298+
299+
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
300+
'ne': 'ne'}
301+
302+
for left, right in ops.items():
303+
left_f = getattr(operator, left)
304+
right_f = getattr(operator, right)
305+
expected = left_f(lhs, rhs)
306+
307+
result = right_f(rhs, lhs)
308+
self.assert_numpy_array_equal(result, expected)
309+
310+
expected = ~expected_nat if left == 'ne' else expected_nat
311+
result = left_f(lhs, nat)
312+
self.assert_numpy_array_equal(result, expected)
313+
result = right_f(nat, lhs)
314+
self.assert_numpy_array_equal(result, expected)
315+
291316
def test_ops_notimplemented(self):
292317
class Other:
293318
pass
@@ -299,6 +324,8 @@ class Other:
299324
self.assertTrue(td.__truediv__(other) is NotImplemented)
300325
self.assertTrue(td.__mul__(other) is NotImplemented)
301326
self.assertTrue(td.__floordiv__(td) is NotImplemented)
327+
self.assertTrue(td.__lt__(other) is NotImplemented)
328+
self.assertTrue(td.__eq__(other) is NotImplemented)
302329

303330
def test_fields(self):
304331
rng = to_timedelta('1 days, 10:11:12')

pandas/tseries/tests/test_timeseries.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,6 +3680,30 @@ def test_timestamp_compare_series(self):
36803680
result = right_f(Timestamp('nat'), s_nat)
36813681
tm.assert_series_equal(result, expected)
36823682

3683+
def test_timestamp_compare_ndarray(self):
3684+
lhs = pd.to_datetime(['1999-12-31', '2000-01-02']).values
3685+
rhs = Timestamp('2000-01-01')
3686+
3687+
nat = Timestamp('nat')
3688+
expected_nat = np.array([False, False])
3689+
3690+
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
3691+
'ne': 'ne'}
3692+
3693+
for left, right in ops.items():
3694+
left_f = getattr(operator, left)
3695+
right_f = getattr(operator, right)
3696+
expected = left_f(lhs, rhs)
3697+
3698+
result = right_f(rhs, lhs)
3699+
self.assert_numpy_array_equal(result, expected)
3700+
3701+
expected = ~expected_nat if left == 'ne' else expected_nat
3702+
result = left_f(lhs, nat)
3703+
self.assert_numpy_array_equal(result, expected)
3704+
result = right_f(nat, lhs)
3705+
self.assert_numpy_array_equal(result, expected)
3706+
36833707

36843708
class TestSlicing(tm.TestCase):
36853709

pandas/tseries/tests/test_tslib.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1+
from distutils.version import LooseVersion
2+
import datetime
13
import nose
2-
34
import numpy as np
45

56
from pandas import tslib
6-
import datetime
7-
8-
from pandas.core.api import Timestamp, Series
7+
from pandas.core.api import Timestamp, Timedelta, Series
98
from pandas.tslib import period_asfreq, period_ordinal
109
from pandas.tseries.index import date_range
1110
from pandas.tseries.frequencies import get_freq
11+
import pandas as pd
1212
import pandas.tseries.offsets as offsets
1313
import pandas.util.testing as tm
1414
from pandas.util.testing import assert_series_equal
@@ -136,6 +136,20 @@ def test_constructor_with_stringoffset(self):
136136
self.assertEqual(repr(result), expected_repr)
137137
self.assertEqual(result, eval(repr(result)))
138138

139+
def test_conversion(self):
140+
ts = Timestamp('2000-01-01')
141+
142+
result = ts.to_pydatetime()
143+
expected = datetime.datetime(2000, 1, 1)
144+
self.assertEqual(result, expected)
145+
self.assertEqual(type(result), type(expected))
146+
147+
result = ts.to_datetime64()
148+
expected = np.datetime64(ts.value, 'ns')
149+
self.assertEqual(result, expected)
150+
self.assertEqual(type(result), type(expected))
151+
self.assertEqual(result.dtype, expected.dtype)
152+
139153
def test_repr(self):
140154
dates = ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001']
141155
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/America/Los_Angeles']
@@ -232,13 +246,13 @@ def test_tz(self):
232246
conv = local.tz_convert('US/Eastern')
233247
self.assertEqual(conv.nanosecond, 5)
234248
self.assertEqual(conv.hour, 19)
235-
249+
236250
def test_tz_localize_ambiguous(self):
237-
251+
238252
ts = Timestamp('2014-11-02 01:00')
239253
ts_dst = ts.tz_localize('US/Eastern', ambiguous=True)
240254
ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False)
241-
255+
242256
rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern')
243257
self.assertEqual(rng[1], ts_dst)
244258
self.assertEqual(rng[2], ts_no_dst)
@@ -675,8 +689,8 @@ def test_addition_subtraction_types(self):
675689
self.assertEqual(type(timestamp_instance + 1), Timestamp)
676690
self.assertEqual(type(timestamp_instance - 1), Timestamp)
677691

678-
# Timestamp + datetime not supported, though subtraction is supported and yields timedelta
679-
self.assertEqual(type(timestamp_instance - datetime_instance), datetime.timedelta)
692+
# Timestamp + datetime not supported, though subtraction is supported and yields Timedelta
693+
self.assertEqual(type(timestamp_instance - datetime_instance), Timedelta)
680694

681695
self.assertEqual(type(timestamp_instance + timedelta_instance), Timestamp)
682696
self.assertEqual(type(timestamp_instance - timedelta_instance), Timestamp)
@@ -686,6 +700,43 @@ def test_addition_subtraction_types(self):
686700
self.assertEqual(type(timestamp_instance + timedelta64_instance), Timestamp)
687701
self.assertEqual(type(timestamp_instance - timedelta64_instance), Timestamp)
688702

703+
def test_ops_ndarray(self):
704+
ts = Timestamp('2000-01-01')
705+
706+
# timedelta operations
707+
other = pd.to_timedelta(['1 day']).values
708+
expected = pd.to_datetime(['2000-01-02']).values
709+
self.assert_numpy_array_equal(ts + other, expected)
710+
if LooseVersion(np.__version__) >= '1.8':
711+
self.assert_numpy_array_equal(other + ts, expected)
712+
self.assertRaises(TypeError, lambda: ts + np.array([1]))
713+
self.assertRaises(TypeError, lambda: np.array([1]) + ts)
714+
715+
expected = pd.to_datetime(['1999-12-31']).values
716+
self.assert_numpy_array_equal(ts - other, expected)
717+
if LooseVersion(np.__version__) >= '1.8':
718+
self.assert_numpy_array_equal(-other + ts, expected)
719+
self.assertRaises(TypeError, lambda: ts - np.array([1]))
720+
self.assertRaises(TypeError, lambda: np.array([1]) - ts)
721+
722+
# datetime operations
723+
other = pd.to_datetime(['1999-12-31']).values
724+
expected = pd.to_timedelta(['1 days']).values
725+
self.assert_numpy_array_equal(ts - other, expected)
726+
if LooseVersion(np.__version__) >= '1.8':
727+
self.assert_numpy_array_equal(other - ts, -expected)
728+
729+
def test_ops_notimplemented(self):
730+
class Other:
731+
pass
732+
other = Other()
733+
734+
ts = Timestamp('2000-01-01')
735+
self.assertTrue(ts.__add__(other) is NotImplemented)
736+
self.assertTrue(ts.__sub__(other) is NotImplemented)
737+
self.assertTrue(ts.__lt__(other) is NotImplemented)
738+
self.assertTrue(ts.__eq__(other) is NotImplemented)
739+
689740
def test_addition_subtraction_preserve_frequency(self):
690741
timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0]
691742
timedelta_instance = datetime.timedelta(days=1)

0 commit comments

Comments
 (0)