diff --git a/doc/source/io.rst b/doc/source/io.rst index 75f36c5274cd2..17c7653072526 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -867,6 +867,12 @@ data columns: index_col=0) #index is the nominal column df +.. note:: + If a column or index contains an unparseable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use :func:`to_datetime` after ``pd.read_csv``. + + .. note:: read_csv has a fast_path for parsing datetime strings in iso8601 format, e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3cd23150bb0bf..200943324ce66 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -167,6 +167,10 @@ * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo' + If a column or index contains an unparseable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv`` + Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False If True and parse_dates is enabled, pandas will attempt to infer the format diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 95fd2d52db009..3a0dd4eaa09e5 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -138,6 +138,19 @@ def date_parser(date, time): names=['datetime', 'prn'])) assert_frame_equal(df, df_correct) + def test_parse_date_column_with_empty_string(self): + # GH 6428 + data = """case,opdate + 7,10/18/2006 + 7,10/18/2008 + 621, """ + result = read_csv(StringIO(data), parse_dates=['opdate']) + expected_data = [[7, '10/18/2006'], + [7, '10/18/2008'], + [621, ' ']] + expected = DataFrame(expected_data, columns=['case', 'opdate']) + assert_frame_equal(result, expected) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2c3e5ca126209..beacc21912edc 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -947,6 +947,18 @@ def test_to_datetime_on_datetime64_series(self): result = to_datetime(s) self.assertEqual(result[0], s[0]) + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + pd.NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + def test_to_datetime_with_apply(self): # this is only locale tested with US/None locales _skip_if_has_locale()