From 917c98f483be4942ec3e67b2601f4c9049ad4370 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 12 Oct 2013 18:09:12 -0400 Subject: [PATCH] BUG: Bug in to_datetime with a format and coerce=True not raising (GH5195) --- doc/source/release.rst | 1 + pandas/tseries/tests/test_timeseries.py | 23 +++++++++++++++++++++++ pandas/tseries/tools.py | 2 +- pandas/tslib.pyx | 8 +++++++- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 55f786d263a0a..f899849475df8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -593,6 +593,7 @@ Bug Fixes - Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`) - Bug in comparing duplicate frames (:issue:`4421`) related - Bug in describe on duplicate frames + - Bug in ``to_datetime`` with a format and ``coerce=True`` not raising (:issue:`5195`) pandas 0.12.0 ------------- diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 473ea21da1585..7f11fa5873fe7 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -879,6 +879,29 @@ def test_to_datetime_on_datetime64_series(self): result = to_datetime(s) self.assertEquals(result[0], s[0]) + def test_to_datetime_with_apply(self): + + # this is only locale tested with US/None locales + import locale + (lang,encoding) = locale.getlocale() + if lang is not None: + raise nose.SkipTest("format codes cannot work with a locale of {0}".format(lang)) + + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1,2,3]) + expected = pd.to_datetime(td, format='%b %y') + result = td.apply(pd.to_datetime, format='%b %y') + assert_series_equal(result, expected) + + td = pd.Series(['May 04', 'Jun 02', ''], index=[1,2,3]) + self.assertRaises(ValueError, lambda : pd.to_datetime(td,format='%b %y')) + self.assertRaises(ValueError, lambda : td.apply(pd.to_datetime, format='%b %y')) + expected = pd.to_datetime(td, format='%b %y', coerce=True) + + result = td.apply(lambda x: pd.to_datetime(x, format='%b %y', coerce=True)) + assert_series_equal(result, expected) + def test_nat_vector_field_access(self): idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000']) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 793d9409e662e..3d8803237931d 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -112,7 +112,7 @@ def _convert_listlike(arg, box): # fallback if result is None: - result = tslib.array_strptime(arg, format) + result = tslib.array_strptime(arg, format, coerce=coerce) else: result = tslib.array_to_datetime(arg, raise_=errors == 'raise', utc=utc, dayfirst=dayfirst, diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index c6c2b418f553d..372de1e7c1b21 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1174,7 +1174,7 @@ def repr_timedelta64(object value): return "%s%02d:%02d:%s" % (sign_pretty, hours, minutes, seconds_pretty) -def array_strptime(ndarray[object] values, object fmt): +def array_strptime(ndarray[object] values, object fmt, coerce=False): cdef: Py_ssize_t i, n = len(values) pandas_datetimestruct dts @@ -1237,9 +1237,15 @@ def array_strptime(ndarray[object] values, object fmt): for i in range(n): found = format_regex.match(values[i]) if not found: + if coerce: + iresult[i] = iNaT + continue raise ValueError("time data %r does not match format %r" % (values[i], fmt)) if len(values[i]) != found.end(): + if coerce: + iresult[i] = iNaT + continue raise ValueError("unconverted data remains: %s" % values[i][found.end():]) year = 1900