From 274ceea44d1deb9ceff32896c1acbd6af303bdec Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 1 Mar 2013 09:05:07 -0500 Subject: [PATCH 1/2] BUG: negative timedeltas not printing correctly ENH: timedelta ops with other timedelta fixed to produce timedeltas BUG: fixed timedelta (in nanops.py) to work with min/max....abs still not working --- RELEASE.rst | 6 ++-- doc/source/timeseries.rst | 16 +++++++++ pandas/core/common.py | 7 ++++ pandas/core/nanops.py | 55 +++++++++++++++++++----------- pandas/core/series.py | 37 +++++++++++--------- pandas/src/inference.pyx | 1 + pandas/tests/test_format.py | 45 ++++++++++++++++++++---- pandas/tests/test_frame.py | 47 +++++++++++++++++++++++++ pandas/tests/test_series.py | 68 ++++++++++++++++++++++++++++++++----- pandas/tslib.pyx | 53 +++++++++++++++++++++-------- 10 files changed, 268 insertions(+), 67 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index e41731131d888..e742008b71831 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -115,10 +115,12 @@ pandas 0.11.0 - Series ops with a Timestamp on the rhs was throwing an exception (GH2898_) added tests for Series ops with datetimes,timedeltas,Timestamps, and datelike Series on both lhs and rhs - - Series will now set its dtype automatically to ``timedelta64[ns]`` - if all passed objects are timedelta objects + - Fixed subtle timedelta64 inference issue on py3 + - Fixed some formatting issues on timedelta when negative - Support null checking on timedelta64, representing (and formatting) with NaT - Support setitem with np.nan value, converts to NaT + - Support min/max ops in a Dataframe (abs not working, nor do we error on non-supported ops) + - Support idxmin/idxmax in a Series (but with no NaT) .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index d627212c6ae9c..78dd5cee9c8f9 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -961,3 +961,19 @@ Operands can also appear in a reversed order (a singluar object operated with a s.max() - s datetime(2011,1,1,3,5) - s timedelta(minutes=5) + s + +Some timedelta numeric like operations are supported. + +.. ipython:: python + + s = Series(date_range('2012-1-1', periods=3, freq='D')) + df = DataFrame(dict(A = s - Timestamp('20120101')-timedelta(minutes=5,seconds=5), + B = s - Series(date_range('2012-1-2', periods=3, freq='D')))) + df + + # timedelta arithmetic + td - timedelta(minutes=5,seconds=5,microseconds=5) + + # min/max operations + df.min() + df.min(axis=1) diff --git a/pandas/core/common.py b/pandas/core/common.py index 4e6215969e7ec..90b31b102fb2f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -904,6 +904,13 @@ def _possibly_convert_platform(values): def _possibly_cast_to_timedelta(value): """ try to cast to timedelta64 w/o coercion """ + + # deal with numpy not being able to handle certain timedelta operations + if isinstance(value,np.ndarray) and value.dtype.kind == 'm': + if value.dtype != 'timedelta64[ns]': + value = value.astype('timedelta64[ns]') + return value + new_value = tslib.array_to_timedelta64(value.astype(object), coerce=False) if new_value.dtype == 'i8': value = np.array(new_value,dtype='timedelta64[ns]') diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 000320027e0e4..8ffcc92fd0b9b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -55,7 +55,7 @@ def f(values, axis=None, skipna=True, **kwds): def _bn_ok_dtype(dt): # Bottleneck chokes on datetime64 - return dt != np.object_ and not issubclass(dt.type, np.datetime64) + return dt != np.object_ and not issubclass(dt.type, (np.datetime64,np.timedelta64)) def _has_infs(result): @@ -69,6 +69,32 @@ def _has_infs(result): else: return np.isinf(result) or np.isneginf(result) +def _isfinite(values): + if issubclass(values.dtype.type, np.timedelta64): + return isnull(values) + return -np.isfinite(values) + +def _na_ok_dtype(dtype): + return not issubclass(dtype.type, (np.integer, np.datetime64, np.timedelta64)) + +def _needs_view_dtype(dtype): + return issubclass(dtype.type, (np.datetime64,np.timedelta64)) + +def _wrap_results(result,dtype): + """ wrap our results if needed """ + + if issubclass(dtype.type, np.datetime64): + if not isinstance(result, np.ndarray): + result = lib.Timestamp(result) + else: + result = result.view(dtype) + elif issubclass(dtype.type, np.timedelta64): + if not isinstance(result, np.ndarray): + pass + else: + result = result.view(dtype) + + return result def nanany(values, axis=None, skipna=True): mask = isnull(values) @@ -162,12 +188,11 @@ def _nanmin(values, axis=None, skipna=True): dtype = values.dtype - if skipna and not issubclass(dtype.type, - (np.integer, np.datetime64)): + if skipna and _na_ok_dtype(dtype): values = values.copy() np.putmask(values, mask, np.inf) - if issubclass(dtype.type, np.datetime64): + if _needs_view_dtype(dtype): values = values.view(np.int64) # numpy 1.6.1 workaround in Python 3.x @@ -187,12 +212,7 @@ def _nanmin(values, axis=None, skipna=True): else: result = values.min(axis) - if issubclass(dtype.type, np.datetime64): - if not isinstance(result, np.ndarray): - result = lib.Timestamp(result) - else: - result = result.view(dtype) - + result = _wrap_results(result,dtype) return _maybe_null_out(result, axis, mask) @@ -201,11 +221,11 @@ def _nanmax(values, axis=None, skipna=True): dtype = values.dtype - if skipna and not issubclass(dtype.type, (np.integer, np.datetime64)): + if skipna and _na_ok_dtype(dtype): values = values.copy() np.putmask(values, mask, -np.inf) - if issubclass(dtype.type, np.datetime64): + if _needs_view_dtype(dtype): values = values.view(np.int64) # numpy 1.6.1 workaround in Python 3.x @@ -226,12 +246,7 @@ def _nanmax(values, axis=None, skipna=True): else: result = values.max(axis) - if issubclass(dtype.type, np.datetime64): - if not isinstance(result, np.ndarray): - result = lib.Timestamp(result) - else: - result = result.view(dtype) - + result = _wrap_results(result,dtype) return _maybe_null_out(result, axis, mask) @@ -239,7 +254,7 @@ def nanargmax(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - mask = -np.isfinite(values) + mask = _isfinite(values) if not issubclass(values.dtype.type, np.integer): values = values.copy() np.putmask(values, mask, -np.inf) @@ -252,7 +267,7 @@ def nanargmin(values, axis=None, skipna=True): """ Returns -1 in the NA case """ - mask = -np.isfinite(values) + mask = _isfinite(values) if not issubclass(values.dtype.type, np.integer): values = values.copy() np.putmask(values, mask, np.inf) diff --git a/pandas/core/series.py b/pandas/core/series.py index f34028482faec..ca08c5e131146 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -81,10 +81,10 @@ def wrapper(self, other): lvalues, rvalues = self, other - is_timedelta = com.is_timedelta64_dtype(self) - is_datetime = com.is_datetime64_dtype(self) + is_timedelta_lhs = com.is_timedelta64_dtype(self) + is_datetime_lhs = com.is_datetime64_dtype(self) - if is_datetime or is_timedelta: + if is_datetime_lhs or is_timedelta_lhs: # convert the argument to an ndarray def convert_to_array(values): @@ -96,26 +96,27 @@ def convert_to_array(values): pass else: values = tslib.array_to_datetime(values) + elif inferred_type in set(['timedelta','timedelta64']): + # need to convert timedelta to ns here + # safest to convert it to an object arrany to process + if isinstance(values, pa.Array) and com.is_timedelta64_dtype(values): + pass + else: + values = com._possibly_cast_to_timedelta(values) else: values = pa.array(values) return values - # swap the valuesor com.is_timedelta64_dtype(self): - if is_timedelta: - lvalues, rvalues = rvalues, lvalues - lvalues = convert_to_array(lvalues) - is_timedelta = False - + # convert lhs and rhs + lvalues = convert_to_array(lvalues) rvalues = convert_to_array(rvalues) - # rhs is either a timedelta or a series/ndarray - if lib.is_timedelta_or_timedelta64_array(rvalues): + is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) + is_datetime_rhs = com.is_datetime64_dtype(rvalues) - # need to convert timedelta to ns here - # safest to convert it to an object arrany to process - rvalues = tslib.array_to_timedelta64(rvalues.astype(object)) - dtype = 'M8[ns]' - elif com.is_datetime64_dtype(rvalues): + # 2 datetimes or 2 timedeltas + if (is_timedelta_lhs and is_timedelta_rhs) or (is_datetime_lhs and is_datetime_rhs): + dtype = 'timedelta64[ns]' # we may have to convert to object unfortunately here @@ -126,6 +127,10 @@ def wrap_results(x): np.putmask(x,mask,tslib.iNaT) return x + # datetime and timedelta + elif (is_timedelta_lhs and is_datetime_rhs) or (is_timedelta_rhs and is_datetime_lhs): + dtype = 'M8[ns]' + else: raise ValueError('cannot operate on a series with out a rhs ' 'of a series/ndarray of type datetime64[ns] ' diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 2f84dd416100e..095968494fb57 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -24,6 +24,7 @@ try: _TYPE_MAP[np.complex256] = 'complex' _TYPE_MAP[np.float16] = 'floating' _TYPE_MAP[np.datetime64] = 'datetime64' + _TYPE_MAP[np.timedelta64] = 'timedelta64' except AttributeError: pass diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 739bea41256df..80b2009465209 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1209,24 +1209,57 @@ def test_float_trim_zeros(self): def test_timedelta64(self): from pandas import date_range - from datetime import datetime + from datetime import datetime, timedelta Series(np.array([1100, 20], dtype='timedelta64[s]')).to_string() - # check this works + + s = Series(date_range('2012-1-1', periods=3, freq='D')) + # GH2146 # adding NaTs - s = Series(date_range('2012-1-1', periods=3, freq='D')) y = s-s.shift(1) result = y.to_string() self.assertTrue('1 days, 00:00:00' in result) self.assertTrue('NaT' in result) # with frac seconds - s = Series(date_range('2012-1-1', periods=3, freq='D')) - y = s-datetime(2012,1,1,microsecond=150) + o = Series([datetime(2012,1,1,microsecond=150)]*3) + y = s-o + result = y.to_string() + self.assertTrue('-00:00:00.000150' in result) + + # rounding? + o = Series([datetime(2012,1,1,1)]*3) + y = s-o + result = y.to_string() + self.assertTrue('-01:00:00' in result) + self.assertTrue('1 days, 23:00:00' in result) + + o = Series([datetime(2012,1,1,1,1)]*3) + y = s-o + result = y.to_string() + self.assertTrue('-01:01:00' in result) + self.assertTrue('1 days, 22:59:00' in result) + + o = Series([datetime(2012,1,1,1,1,microsecond=150)]*3) + y = s-o + result = y.to_string() + self.assertTrue('-01:01:00.000150' in result) + self.assertTrue('1 days, 22:58:59.999850' in result) + + # neg time + td = timedelta(minutes=5,seconds=3) + s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td + y = s - s2 + result = y.to_string() + self.assertTrue('-00:05:03' in result) + + td = timedelta(microseconds=550) + s2 = Series(date_range('2012-1-1', periods=3, freq='D')) + td + y = s - td result = y.to_string() - self.assertTrue('00:00:00.000150' in result) + self.assertTrue('2012-01-01 23:59:59.999450' in result) def test_mixed_datetime64(self): df = DataFrame({'A': [1, 2], diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 81fbc0fc4d84d..9329bb1da2b07 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2884,6 +2884,53 @@ def test_timedeltas(self): expected.sort() assert_series_equal(result, expected) + def test_operators_timedelta64(self): + + from pandas import date_range + from datetime import datetime, timedelta + df = DataFrame(dict(A = date_range('2012-1-1', periods=3, freq='D'), + B = date_range('2012-1-2', periods=3, freq='D'), + C = Timestamp('20120101')-timedelta(minutes=5,seconds=5))) + + diffs = DataFrame(dict(A = df['A']-df['C'], + B = df['A']-df['B'])) + + + # min + result = diffs.min() + self.assert_(result[0] == diffs.ix[0,'A']) + self.assert_(result[1] == diffs.ix[0,'B']) + + result = diffs.min(axis=1) + self.assert_((result == diffs.ix[0,'B']).all() == True) + + # max + result = diffs.max() + self.assert_(result[0] == diffs.ix[2,'A']) + self.assert_(result[1] == diffs.ix[2,'B']) + + result = diffs.max(axis=1) + self.assert_((result == diffs['A']).all() == True) + + # abs ###### THIS IS BROKEN NOW ###### (results are dtype=timedelta64[us] + result = np.abs(df['A']-df['B']) + result = diffs.abs() + expected = DataFrame(dict(A = df['A']-df['C'], + B = df['B']-df['A'])) + #assert_frame_equal(result,expected) + + # mixed frame + mixed = diffs.copy() + mixed['C'] = 'foo' + mixed['D'] = 1 + mixed['E'] = 1. + + # this is ok + result = mixed.min() + + # this is not + result = mixed.min(axis=1) + def test_new_empty_index(self): df1 = DataFrame(randn(0, 3)) df2 = DataFrame(randn(0, 3)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index fdaede9a2949c..46310cce3160d 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1740,33 +1740,83 @@ def test_operators_timedelta64(self): # datetimes on rhs result = df['A'] - datetime(2001,1,1) - self.assert_(result.dtype=='timedelta64[ns]') + expected = Series([timedelta(days=4017+i) for i in range(3)]) + assert_series_equal(result,expected) + self.assert_(result.dtype=='m8[ns]') result = df['A'] + datetime(2001,1,1) - self.assert_(result.dtype=='timedelta64[ns]') + expected = Series([timedelta(days=26663+i) for i in range(3)]) + assert_series_equal(result,expected) + self.assert_(result.dtype=='m8[ns]') - td = datetime(2001,1,1,3,4) - resulta = df['A'] - td - self.assert_(resulta.dtype=='timedelta64[ns]') + d = datetime(2001,1,1,3,4) + resulta = df['A'] - d + self.assert_(resulta.dtype=='m8[ns]') - resultb = df['A'] + td - self.assert_(resultb.dtype=='timedelta64[ns]') + resultb = df['A'] + d + self.assert_(resultb.dtype=='m8[ns]') + + # roundtrip + resultb = resulta + d + assert_series_equal(df['A'],resultb) # timedelta on lhs - result = resultb + td - self.assert_(resultb.dtype=='timedelta64[ns]') + result = resultb + d + self.assert_(result.dtype=='m8[ns]') # timedeltas on rhs td = timedelta(days=1) resulta = df['A'] + td resultb = resulta - td assert_series_equal(resultb,df['A']) + self.assert_(resultb.dtype=='M8[ns]') + # roundtrip td = timedelta(minutes=5,seconds=3) resulta = df['A'] + td resultb = resulta - td + assert_series_equal(df['A'],resultb) self.assert_(resultb.dtype=='M8[ns]') + # td operate with td + td1 = Series([timedelta(minutes=5,seconds=3)]*3) + td2 = timedelta(minutes=5,seconds=4) + result = td1-td2 + expected = Series([timedelta(seconds=0)]*3)-Series([timedelta(seconds=1)]*3) + self.assert_(result.dtype=='m8[ns]') + assert_series_equal(result,expected) + + def test_timedelta64_functions(self): + + from datetime import timedelta + + # index min/max + td = Series(date_range('2012-1-1', periods=3, freq='D'))-Timestamp('20120101') + + result = td.idxmin() + self.assert_(result == 0) + + result = td.idxmax() + self.assert_(result == 2) + + # with NaT (broken) + td[0] = np.nan + + #result = td.idxmin() + #self.assert_(result == 1) + + #result = td.idxmax() + #self.assert_(result == 2) + + def test_sub_of_datetime_from_TimeSeries(self): + from pandas.core import common as com + from datetime import datetime + a = Timestamp(datetime(1993,01,07,13,30,00)) + b = datetime(1993, 6, 22, 13, 30) + a = Series([a]) + result = com._possibly_cast_to_timedelta(np.abs(a - b)) + self.assert_(result.dtype == 'timedelta64[ns]') + def test_timedelta64_nan(self): from pandas import tslib diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 6ac2ee3607f51..7a5bb0f569349 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -928,25 +928,50 @@ def repr_timedelta64(object value): ivalue = value.view('i8') + # put frac in seconds frac = float(ivalue)/1e9 - days = int(frac) / 86400 - frac -= days*86400 - hours = int(frac) / 3600 - frac -= hours * 3600 - minutes = int(frac) / 60 - seconds = frac - minutes * 60 - nseconds = int(seconds) - - if nseconds == seconds: - seconds_pretty = "%02d" % nseconds + sign = np.sign(frac) + frac = np.abs(frac) + + if frac >= 86400: + days = int(frac / 86400) + frac -= days * 86400 + else: + days = 0 + + if frac >= 3600: + hours = int(frac / 3600) + frac -= hours * 3600 + else: + hours = 0 + + if frac >= 60: + minutes = int(frac / 60) + frac -= minutes * 60 + else: + minutes = 0 + + if frac >= 1: + seconds = int(frac) + frac -= seconds + else: + seconds = 0 + + if frac == int(frac): + seconds_pretty = "%02d" % seconds + else: + sp = abs(round(1e6*frac)) + seconds_pretty = "%02d.%06d" % (seconds,sp) + + if sign < 0: + sign_pretty = "-" else: - sp = abs(int(1e6*(seconds-nseconds))) - seconds_pretty = "%02d.%06d" % (nseconds,sp) + sign_pretty = "" if days: - return "%d days, %02d:%02d:%s" % (days,hours,minutes,seconds_pretty) + return "%s%d days, %02d:%02d:%s" % (sign_pretty,days,hours,minutes,seconds_pretty) - return "%02d:%02d:%s" % (hours,minutes,seconds_pretty) + return "%s%02d:%02d:%s" % (sign_pretty,hours,minutes,seconds_pretty) def array_strptime(ndarray[object] values, object fmt): cdef: From ad7a399bb0fd48c26f1ea50474df7358d8086b9c Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 1 Mar 2013 12:26:23 -0500 Subject: [PATCH 2/2] BUG: issue with timedelta64 and numpy 1.6.1 (used on travis), fixed by using views --- pandas/core/nanops.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8ffcc92fd0b9b..881cef2311b27 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -77,8 +77,10 @@ def _isfinite(values): def _na_ok_dtype(dtype): return not issubclass(dtype.type, (np.integer, np.datetime64, np.timedelta64)) -def _needs_view_dtype(dtype): - return issubclass(dtype.type, (np.datetime64,np.timedelta64)) +def _view_if_needed(values): + if issubclass(values.dtype.type, (np.datetime64,np.timedelta64)): + return values.view(np.int64) + return values def _wrap_results(result,dtype): """ wrap our results if needed """ @@ -192,8 +194,7 @@ def _nanmin(values, axis=None, skipna=True): values = values.copy() np.putmask(values, mask, np.inf) - if _needs_view_dtype(dtype): - values = values.view(np.int64) + values = _view_if_needed(values) # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ @@ -225,8 +226,7 @@ def _nanmax(values, axis=None, skipna=True): values = values.copy() np.putmask(values, mask, -np.inf) - if _needs_view_dtype(dtype): - values = values.view(np.int64) + values = _view_if_needed(values) # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ @@ -255,6 +255,7 @@ def nanargmax(values, axis=None, skipna=True): Returns -1 in the NA case """ mask = _isfinite(values) + values = _view_if_needed(values) if not issubclass(values.dtype.type, np.integer): values = values.copy() np.putmask(values, mask, -np.inf) @@ -268,6 +269,7 @@ def nanargmin(values, axis=None, skipna=True): Returns -1 in the NA case """ mask = _isfinite(values) + values = _view_if_needed(values) if not issubclass(values.dtype.type, np.integer): values = values.copy() np.putmask(values, mask, np.inf)