diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index fee98720990e0..5f9ebef4fd546 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -74,3 +74,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- If there is a large numeric value in excel cell of type 'date', convert it to float instead of raising an error (:issue:`10001`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 642f322f17fa1..4c26480a0f583 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -329,11 +329,15 @@ def _parse_cell(cell_contents, cell_typ): appropriate object""" if cell_typ == XL_CELL_DATE: + if xlrd_0_9_3: # Use the newer xlrd datetime handling. - cell_contents = xldate.xldate_as_datetime(cell_contents, - epoch1904) - + try: + cell_contents = \ + xldate.xldate_as_datetime(cell_contents, + epoch1904) + except OverflowError: + return cell_contents # Excel doesn't distinguish between dates and time, # so we treat dates on the epoch as times only. # Also, Excel supports 1900 and 1904 epochs. @@ -346,7 +350,11 @@ def _parse_cell(cell_contents, cell_typ): cell_contents.microsecond) else: # Use the xlrd <= 0.9.2 date handling. - dt = xldate.xldate_as_tuple(cell_contents, epoch1904) + try: + dt = xldate.xldate_as_tuple(cell_contents, epoch1904) + + except xldate.XLDateTooLarge: + return cell_contents if dt[0] < MINYEAR: cell_contents = time(*dt[3:]) diff --git a/pandas/io/tests/data/testdateoverflow.xls b/pandas/io/tests/data/testdateoverflow.xls new file mode 100644 index 0000000000000..d63942f1b5f64 Binary files /dev/null and b/pandas/io/tests/data/testdateoverflow.xls differ diff --git a/pandas/io/tests/data/testdateoverflow.xlsm b/pandas/io/tests/data/testdateoverflow.xlsm new file mode 100644 index 0000000000000..0207549692e6a Binary files /dev/null and b/pandas/io/tests/data/testdateoverflow.xlsm differ diff --git a/pandas/io/tests/data/testdateoverflow.xlsx b/pandas/io/tests/data/testdateoverflow.xlsx new file mode 100644 index 0000000000000..15f0e8825339e Binary files /dev/null and b/pandas/io/tests/data/testdateoverflow.xlsx differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 5dd764b471d3f..16905e3559cdb 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -481,6 +481,16 @@ def test_set_column_names_in_parameter(self): tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf) + def test_date_conversion_overflow(self): + # GH 10001 : pandas.ExcelFile ignore parse_dates=False + refdf = pd.DataFrame([[pd.Timestamp('2016-03-12'), 'Marc Johnson'], + [pd.Timestamp('2016-03-16'), 'Jack Black'], + [1e+20, 'Timothy Brown']], + columns=['DateColWithBigInt', 'StringCol']) + + act_df = self.get_exceldf('testdateoverflow') + tm.assert_frame_equal(refdf, act_df) + class XlrdTests(ReadingTestsBase): """