From f7a8a2e7fdebe48c35c3a14f4e991efc0ea07890 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 30 Jun 2014 09:31:11 -0400 Subject: [PATCH] BUG: Bug in to_timedelta that accepted invalid units and misinterpreted m/h (GH7611, GH6423) --- doc/source/v0.14.1.txt | 2 +- pandas/tseries/tests/test_timedeltas.py | 32 ++++++++++++++++---- pandas/tseries/timedeltas.py | 40 ++++++++++++++++++++----- pandas/tslib.pyx | 24 +++++++++++---- 4 files changed, 79 insertions(+), 19 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index be818e71f55a8..58154e066c0bf 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -176,7 +176,7 @@ Bug Fixes - Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`) - Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`) - +- Bug in ``to_timedelta`` that accepted invalid units and misinterpreted 'm/h' (:issue:`7611`, :issue: `6423`) - Bug in ``Panel.apply`` with a multi-index as an axis (:issue:`7469`) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index f7acb182b8cde..8e841632d88d3 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -199,20 +199,40 @@ def conv(v): expected = Series([ np.timedelta64(1,'D') ]*5) tm.assert_series_equal(result, expected) + def testit(unit, transform): + + # array + result = to_timedelta(np.arange(5),unit=unit) + expected = Series([ np.timedelta64(i,transform(unit)) for i in np.arange(5).tolist() ]) + tm.assert_series_equal(result, expected) + + # scalar + result = to_timedelta(2,unit=unit) + expected = np.timedelta64(2,transform(unit)).astype('timedelta64[ns]') + self.assert_numpy_array_equal(result,expected) + # validate all units # GH 6855 for unit in ['Y','M','W','D','y','w','d']: - result = to_timedelta(np.arange(5),unit=unit) - expected = Series([ np.timedelta64(i,unit.upper()) for i in np.arange(5).tolist() ]) - tm.assert_series_equal(result, expected) + testit(unit,lambda x: x.upper()) + for unit in ['days','day','Day','Days']: + testit(unit,lambda x: 'D') for unit in ['h','m','s','ms','us','ns','H','S','MS','US','NS']: - result = to_timedelta(np.arange(5),unit=unit) - expected = Series([ np.timedelta64(i,unit.lower()) for i in np.arange(5).tolist() ]) - tm.assert_series_equal(result, expected) + testit(unit,lambda x: x.lower()) + + # offsets + + # m + testit('T',lambda x: 'm') + + # ms + testit('L',lambda x: 'ms') # these will error self.assertRaises(ValueError, lambda : to_timedelta(['1h'])) self.assertRaises(ValueError, lambda : to_timedelta(['1m'])) + self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo')) + self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo')) def test_to_timedelta_via_apply(self): _skip_if_numpy_not_friendly() diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 1dc8b5cfea132..0d6d74db6f18c 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -32,6 +32,8 @@ def to_timedelta(arg, box=True, unit='ns'): if _np_version_under1p7: raise ValueError("to_timedelta is not support for numpy < 1.7") + unit = _validate_timedelta_unit(unit) + def _convert_listlike(arg, box, unit): if isinstance(arg, (list,tuple)): @@ -40,7 +42,6 @@ def _convert_listlike(arg, box, unit): if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): - unit = _validate_timedelta_unit(unit) # these are shortcutable value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') @@ -67,14 +68,39 @@ def _convert_listlike(arg, box, unit): # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit) +_unit_map = { + 'Y' : 'Y', + 'y' : 'Y', + 'W' : 'W', + 'w' : 'W', + 'D' : 'D', + 'd' : 'D', + 'days' : 'D', + 'Days' : 'D', + 'day' : 'D', + 'Day' : 'D', + 'M' : 'M', + 'H' : 'h', + 'h' : 'h', + 'm' : 'm', + 'T' : 'm', + 'S' : 's', + 's' : 's', + 'L' : 'ms', + 'MS' : 'ms', + 'ms' : 'ms', + 'US' : 'us', + 'us' : 'us', + 'NS' : 'ns', + 'ns' : 'ns', + } + def _validate_timedelta_unit(arg): """ provide validation / translation for timedelta short units """ - - if re.search("Y|W|D",arg,re.IGNORECASE) or arg == 'M': - return arg.upper() - elif re.search("h|m|s|ms|us|ns",arg,re.IGNORECASE): - return arg.lower() - raise ValueError("invalid timedelta unit {0} provided".format(arg)) + try: + return _unit_map[arg] + except: + raise ValueError("invalid timedelta unit {0} provided".format(arg)) _short_search = re.compile( "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index c957884b3cebb..d8aed3a42ca72 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1387,11 +1387,17 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce): else: if util.is_array(ts): ts = ts.astype('int64').item() - ts = cast_from_unit(ts, unit) - if _np_version_under1p7: - ts = timedelta(microseconds=ts/1000.0) + if unit in ['Y','M','W']: + if _np_version_under1p7: + raise ValueError("unsupported unit for native timedelta under this numpy {0}".format(unit)) + else: + ts = np.timedelta64(ts,unit) else: - ts = np.timedelta64(ts) + ts = cast_from_unit(ts, unit) + if _np_version_under1p7: + ts = timedelta(microseconds=ts/1000.0) + else: + ts = np.timedelta64(ts) elif util.is_string_object(ts): if ts in _nat_strings or coerce: return np.timedelta64(iNaT) @@ -1747,6 +1753,12 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1: if unit == 'D' or unit == 'd': m = 1000000000L * 86400 p = 6 + elif unit == 'h': + m = 1000000000L * 3600 + p = 6 + elif unit == 'm': + m = 1000000000L * 60 + p = 6 elif unit == 's': m = 1000000000L p = 6 @@ -1756,9 +1768,11 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1: elif unit == 'us': m = 1000L p = 0 - else: + elif unit == 'ns' or unit is None: m = 1L p = 0 + else: + raise ValueError("cannot cast unit {0}".format(unit)) # just give me the unit back if ts is None: