From 36bc9b75a4975b67f879d73536b5cb9a27811b75 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 11 Dec 2016 21:01:48 -0800 Subject: [PATCH 1/2] TST: Parse dates with empty space (#6428) Add doc explaining parse_date limitation --- doc/source/io.rst | 6 ++++++ pandas/io/parsers.py | 4 ++++ pandas/io/tests/test_date_converters.py | 13 +++++++++++++ pandas/tseries/tests/test_timeseries.py | 12 ++++++++++++ 4 files changed, 35 insertions(+) diff --git a/doc/source/io.rst b/doc/source/io.rst index 75f36c5274cd2..a731fbacdc38f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -867,6 +867,12 @@ data columns: index_col=0) #index is the nominal column df +.. note:: + If a column or index contains an unparseable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv`` + + .. note:: read_csv has a fast_path for parsing datetime strings in iso8601 format, e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3cd23150bb0bf..200943324ce66 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -167,6 +167,10 @@ * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo' + If a column or index contains an unparseable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv`` + Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False If True and parse_dates is enabled, pandas will attempt to infer the format diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 95fd2d52db009..3a0dd4eaa09e5 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -138,6 +138,19 @@ def date_parser(date, time): names=['datetime', 'prn'])) assert_frame_equal(df, df_correct) + def test_parse_date_column_with_empty_string(self): + # GH 6428 + data = """case,opdate + 7,10/18/2006 + 7,10/18/2008 + 621, """ + result = read_csv(StringIO(data), parse_dates=['opdate']) + expected_data = [[7, '10/18/2006'], + [7, '10/18/2008'], + [621, ' ']] + expected = DataFrame(expected_data, columns=['case', 'opdate']) + assert_frame_equal(result, expected) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2c3e5ca126209..beacc21912edc 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -947,6 +947,18 @@ def test_to_datetime_on_datetime64_series(self): result = to_datetime(s) self.assertEqual(result[0], s[0]) + def test_to_datetime_with_space_in_series(self): + # GH 6428 + s = Series(['10/18/2006', '10/18/2008', ' ']) + tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise')) + result_coerce = to_datetime(s, errors='coerce') + expected_coerce = Series([datetime(2006, 10, 18), + datetime(2008, 10, 18), + pd.NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors='ignore') + tm.assert_series_equal(result_ignore, s) + def test_to_datetime_with_apply(self): # this is only locale tested with US/None locales _skip_if_has_locale() From 23a4cb1611240b0aeb688d053159f4a094306516 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Dec 2016 12:06:37 +0100 Subject: [PATCH 2/2] Add link to to_datetime --- doc/source/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index a731fbacdc38f..17c7653072526 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -870,7 +870,7 @@ data columns: .. note:: If a column or index contains an unparseable date, the entire column or index will be returned unaltered as an object data type. For non-standard - datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv`` + datetime parsing, use :func:`to_datetime` after ``pd.read_csv``. .. note::