From 030f5ec8755fa39c11a07d86a2f211fe113ba789 Mon Sep 17 00:00:00 2001 From: Stephen Rauch Date: Sun, 12 Feb 2017 08:15:56 -0800 Subject: [PATCH] BUG: Parse two date columns broken in read_csv with multiple headers Fix for GH15376 In `io/parsers/_try_convert_dates()` when selecting columns based on a column index from a set of columns with multi-level names, the column `name` was converted to a string. This appears to be a bug since the `name` was a tuple before the conversion. This causes problems downstream when threre is an attempt to use this name to lookup a column, and that lookup fails becuase the desired column is keyed from the tuple, not its string representation. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/io/parsers.py | 2 +- pandas/tests/io/test_date_converters.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9f86c777c665d..eaafb5cd4a552 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -574,6 +574,7 @@ Bug Fixes +- Bug in ``.read_csv()`` which caused ``parse_dates={'datetime': [0, 1]}`` to fail with multiline headers (:issue:`15376`) - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index f8905dfa315c4..6dad5d5aca316 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2856,7 +2856,7 @@ def _try_convert_dates(parser, colspec, data_dict, columns): if c in colset: colnames.append(c) elif isinstance(c, int) and c not in columns: - colnames.append(str(columns[c])) + colnames.append(columns[c]) else: colnames.append(c) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py index 5b54925c65fbd..0208a2db145e8 100644 --- a/pandas/tests/io/test_date_converters.py +++ b/pandas/tests/io/test_date_converters.py @@ -148,3 +148,21 @@ def test_parse_date_column_with_empty_string(self): [621, ' ']] expected = DataFrame(expected_data, columns=['case', 'opdate']) assert_frame_equal(result, expected) + + def test_parse_date_time_multi_level_column_name(self): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + datecols = {'date_time': [0, 1]} + result = read_csv(StringIO(data), sep=',', header=[0, 1], + parse_dates=datecols, + date_parser=conv.parse_date_time) + + expected_data = [[datetime(2001, 1, 5, 9, 0, 0), 0., 10.], + [datetime(2001, 1, 6, 0, 0, 0), 1., 11.]] + expected = DataFrame(expected_data, + columns=['date_time', ('A', 'a'), ('B', 'b')]) + assert_frame_equal(result, expected)