From 8f417acea87a84a40a6a3fef127794b6d44a4357 Mon Sep 17 00:00:00 2001 From: timmie Date: Thu, 22 Aug 2013 03:39:47 +0200 Subject: [PATCH 1/4] now sectionwise: date_converter: excel / date_parser #4332 --- pandas/io/excel.py | 31 +++++++- pandas/io/tests/test_excel.py | 145 +++++++++++++++++++++++++++++++++- 2 files changed, 170 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 534a88e303dbf..588450cb4c11f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -127,8 +127,9 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: skip_footer = skipfooter - - return self._parse_excel(sheetname, header=header, skiprows=skiprows, + + # this now gives back a df + res = self._parse_excel(sheetname, header=header, skiprows=skiprows, index_col=index_col, has_index_names=has_index_names, parse_cols=parse_cols, @@ -136,6 +137,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, date_parser=date_parser, na_values=na_values, thousands=thousands, chunksize=chunksize, skip_footer=skip_footer, **kwds) + + return res def _should_parse(self, i, parse_cols): @@ -195,11 +198,24 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, if parse_cols is None or should_parse[j]: if typ == XL_CELL_DATE: dt = xldate_as_tuple(value, datemode) + # how to produce this first case? + # if the year is ZERO then values are time/hours if dt[0] < datetime.MINYEAR: # pragma: no cover - value = datetime.time(*dt[3:]) + datemode = 1 + dt = xldate_as_tuple(value, datemode) + + value = datetime.time(*dt[3:]) + + + #or insert a full date else: value = datetime.datetime(*dt) + + #apply eventual date_parser correction + if date_parser: + value = date_parser(value) + elif typ == XL_CELL_ERROR: value = np.nan elif typ == XL_CELL_BOOLEAN: @@ -221,8 +237,15 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, skip_footer=skip_footer, chunksize=chunksize, **kwds) + res = parser.read() + + if header is not None: + + if len(data[header]) == len(res.columns.tolist()): + res.columns = data[header] + - return parser.read() + return res @property def sheet_names(self): diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 3f41be6ae64c6..2074950aadbc5 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,7 +1,7 @@ # pylint: disable=E1101 from pandas.compat import StringIO, BytesIO, PY3, u, range, map -from datetime import datetime +#from datetime import datetime from os.path import split as psplit import csv import os @@ -14,7 +14,7 @@ from numpy import nan import numpy as np -from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex +from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex, datetime import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextParser, TextFileReader) @@ -66,6 +66,78 @@ def _skip_if_no_excelsuite(): _skip_if_no_openpyxl() +def _skip_if_no_mpl(): + '''pandas.tseries.converter imports matplotlib''' + try: + import matplotlib + except ImportError: + raise nose.SkipTest('matplotlib not installed, skipping') + + +def _offset_time(value, offset=-10): + '''appply corrective time offset in minutes + + input + ----- + value : datetime.time + offset : integer value in minutes + ''' + # if a excel time like '23.07.2013 24:00' they actually mean + # in Python '23.07.2013 23:59', must be converted +# offset = -10 # minutes + _skip_if_no_mpl() + from pandas.io.date_converters import offset_datetime + ti_corr = offset_datetime(value, minutes=offset) + # combine the corrected time component with the datetime +# dt_comb = dt.datetime.combine(dt_now, ti_corr) + + #since input is time, we return it. + #TODO: + #it is actually very strange that Pandas does consider an index + #of datetime.time as index of objects and not time + + return ti_corr + + +def _correct_date_time(value): + '''corrects the times in the Excel test file to Python time + ''' + _skip_if_no_xlrd() + _skip_if_no_mpl() + from pandas.io.date_converters import dt2ti + + # if a excel time like '24:00' it converted to 23.07.2013 00:00' + # here, we just want the time component, + # since all inputs shall be equal + value = dt2ti(value) + + #apply offset + value = _offset_time(value) + + return value + + +def read_excel_cell(filename): + '''read the excel cells into a dt object''' + _skip_if_no_xlrd() + # NameError: global name 'xlrd' is not defined + from xlrd import open_workbook, xldate_as_tuple + import datetime as dt + wb = open_workbook(filename) + sh = wb.sheet_by_name('min') + #get first time stamp + #TODO: the start row is: 12 + ti_start = xldate_as_tuple(sh.row(12)[1].value, 1) + #get first last stamp + ti_end = xldate_as_tuple(sh.row(155)[1].value, 1) + + #as timestamp + ti_start = dt.time(*ti_start[3:]) + ti_end = dt.time(*ti_end[3:]) + + return (ti_start, ti_end) + + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -295,6 +367,75 @@ def test_xlsx_table(self): tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) + def test_xlsx_table_hours(self): + #check if the hours are read incorrectly + _skip_if_no_xlrd() + _skip_if_no_openpyxl() + _skip_if_no_mpl() + import datetime as dt + + + + # 1900 datemode file + filename = 'example_file_2013-07-25.xlsx' + pth = os.path.join(self.dirpath, filename) + xlsx = ExcelFile(pth) + # parse_dates=False is necessary to obtain right sorting of rows in df + # TODO: this must actually be skiprows=11, header=10 +# df =xlsx.parse('min', skiprows=12, header=10, index_col=1, +# parse_dates=False, date_parser=correct_date_time) + df =xlsx.parse('min', skiprows=12, header=10, index_col=1, + parse_dates=False, date_parser=_correct_date_time) + + df_start = df.index[0] + df_end = df.index[-1:] + # test: are the first/last index equal to the cell read in diretly by xlrd + excel_cells = read_excel_cell(pth) + + xl_start = _offset_time(excel_cells[0]) + xl_end = _offset_time(excel_cells[1]) + + self.assertEqual(df_start, xl_start) + self.assertEqual(df_end, xl_end) + + #test Excel 1904 datemode + filename_1904 = 'example_file_2013-07-25_1904-dates.xlsx' + pth = os.path.join(self.dirpath, filename_1904) + xlsx = ExcelFile(pth) + # parse_dates=False is necessary to obtain right sorting of roes in df + # TODO: this must actually be skiprows=11 + df =xlsx.parse('min', skiprows=12, header=10, index_col=1, + parse_dates=False, date_parser=_correct_date_time) + + df_start = df.index[0] + df_end = df.index[-1:] + + excel_cells = read_excel_cell(pth) + xl_start = _offset_time(excel_cells[0]) + xl_end = _offset_time(excel_cells[1]) + + # test: are the first/last index equal to the cell read in diretly + self.assertEqual(df_start, xl_start) + self.assertEqual(df_end, xl_end) + + # test if a produced datetime is equal to a datetime directly produced by xlrd + daydt_str = filename.split('.')[0][-10:] + daydt = dt.datetime.strptime(daydt_str, '%Y-%m-%d') +# + df['date'] = daydt + df['time'] = df.index + + #TODO review this +# df['datetime'] = df.apply(lambda x: pd.datetime.combine(x['date'], x['time'], axis=1)) + +# df.set_index(['datetime']) +# import datetime as dt +# dt_test = dt.datetime.combine(daydt, excel_cells[1]) + +# pdt_test = df.index[-1] + +# self.assertEqual(dt_test, pdt_test) + def test_specify_kind_xls(self): _skip_if_no_xlrd() xlsx_file = os.path.join(self.dirpath, 'test.xlsx') From 82fdb7d049c33cc7d3db762e75645ce1a19a2377 Mon Sep 17 00:00:00 2001 From: timmie Date: Thu, 22 Aug 2013 03:56:33 +0200 Subject: [PATCH 2/4] now sectionwise: parser / skip rowsin between #4340 --- pandas/io/parsers.py | 32 ++++++++++++++++++++++++++++++-- pandas/io/tests/test_parsers.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3b132be800cb1..9d0581f9094bc 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1150,7 +1150,11 @@ def TextParser(*args, **kwds): returns Series if only one column """ kwds['engine'] = 'python' - return TextFileReader(*args, **kwds) + + res = TextFileReader(*args, **kwds) + + + return res # delimiter=None, dialect=None, names=None, header=0, # index_col=None, @@ -1385,6 +1389,7 @@ def _convert_data(self, data): clean_conv) def _infer_columns(self): + #TODO: this full part is too complex and somewhat strage!!! names = self.names if self.header is not None: @@ -1396,13 +1401,20 @@ def _infer_columns(self): header = list(header) + [header[-1]+1] else: have_mi_columns = False + #TODO: explain why header (in this case 1 number) needs to be a list??? header = [ header ] columns = [] for level, hr in enumerate(header): - + #TODO: explain why self.buf is needed. + # the header is correctly retrieved in excel.py by + # data[header] = _trim_excel_header(data[header]) if len(self.buf) > 0: line = self.buf[0] + + elif (header[0] == hr) and (level == 0) and (header[0] > 0): + line = self._get_header() + else: line = self._next_line() @@ -1456,8 +1468,24 @@ def _infer_columns(self): columns = [ names ] return columns + + def _get_header(self): + ''' reads header if e.g. header + FIXME: this tshoul be turned into something much less complicates + FIXME: all due to the header assuming that there is never a row between + data and header + ''' + if isinstance(self.data, list): + line = self.data[self.header] + self.pos = self.header +1 + else: + line = self._next_line() + + return line def _next_line(self): + #FIXME: why is self.data at times a list and sometimes a _scv.reader?? + # reduce complexity here!!! if isinstance(self.data, list): while self.pos in self.skiprows: self.pos += 1 diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 787682f340250..f225200c53a87 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -1,4 +1,6 @@ # pylint: disable=E1101 +from __future__ import absolute_import + from datetime import datetime import csv @@ -38,6 +40,13 @@ from pandas.parser import OverflowError +def _skip_if_no_mpl(): + '''pandas.tseries.converter imports matplotlib''' + try: + import matplotlib + except ImportError: + raise nose.SkipTest('matplotlib not installed, skipping') + class ParserTests(object): """ @@ -2015,6 +2024,30 @@ def test_iteration_open_handle(self): expected = Series(['DDD', 'EEE', 'FFF', 'GGG']) tm.assert_series_equal(result, expected) + def test_infer_columns(self): + '''reads xls with certain order of header, skiprows / data''' + _skip_if_no_mpl() + from pandas.io.excel import ExcelFile + from . import test_excel + correct_date_time = test_excel._correct_date_time + test_excel._skip_if_no_excelsuite() + + # test of the header column is read in nicely + # list with the expected column names from the excel file + headercols_target = ['blank', 'temperature', 'precipitation', 'Area'] + + # add the block reading the excel file into a DataFrame + filename = 'example_file_2013-07-25.xlsx' + pth = os.path.join(self.dirpath, filename) + xlsx = ExcelFile(pth) + df = xlsx.parse('min', skiprows=12, header=10, index_col=1, + parse_dates=False, date_parser=correct_date_time) + #read in the excel file + headercols_df_in = df.columns.tolist() + + self.assertEqual(headercols_df_in, headercols_target) + + class TestCParserHighMemory(ParserTests, unittest.TestCase): def read_csv(self, *args, **kwds): From 7bb4817b68bfc05726ff0075d1e7fae40425636c Mon Sep 17 00:00:00 2001 From: TimMi Date: Thu, 22 Aug 2013 12:07:20 +0200 Subject: [PATCH 3/4] added the missing test data files for this very branch --- .../io/tests/data/example_file_2013-07-25.xlsx | Bin 0 -> 15566 bytes .../example_file_2013-07-25_1904-dates.xlsx | Bin 0 -> 13246 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/io/tests/data/example_file_2013-07-25.xlsx create mode 100644 pandas/io/tests/data/example_file_2013-07-25_1904-dates.xlsx diff --git a/pandas/io/tests/data/example_file_2013-07-25.xlsx b/pandas/io/tests/data/example_file_2013-07-25.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0d7b4f9e8f2270d0169146ac84e48abc7fd5a839 GIT binary patch literal 15566 zcmaKT1z42b*7nT6(48`hv~&teC=A`*IFy1SDcva@64FSjGy@XS3?b5j(%s!1L;Qp1 zJFoBe{@*#GOK6|9Y~Y&_Dk-)HhrAJL*?x4`4-Ed;4@~lyw73ZW69KM zl9$KfA7sO2>1(Hlo*Sd`80CUmT?n$#|oUU{;ss$&jZqnhfK1eryUIYUI7g zrS`=E&hjO;@$Ser&bM~ac47fxv zx#1Oa0iaB&n0?p;$VQ^<0}=P*tRU4Ftx}HDLYxxua5|k%?w^1Z^*UNeK)y7Q8Q{4H z@lT+B5B6(>p$N!QJ`0Thz^zq7Mn%7*b*6PV?4x@k!CWN*azCB5_Rvz{*gNxNJUIgS z@L>P)NZ5y*{ow}%5ZfVIsv;&mG+hdCW4N`}@QN^c7$>0(pQo|IT3x-n<22=c#5t*2 zhwaD)4Sf$!V|#^r`^pDHqG&&dwSg8L{NW^i=2ZtRUtBKV-<7q`S7bzo(w5`DRu%-M zubVaZzjmFgorBeLJ3FgejeqaJ#<+gVHXc0G4#fG0TwuKWB2ut1UcB_#Dd4;M!oasj zaVL3pV$o5YK^iVBB2--`r%0&f$nN1Mqgc1d7N~Z77EjGslqt_%gJ(_MTjzSq`}XB= znPoJjMVHKFK<>Dms+X(wUt!+@>1K~&U$WPJ5@+5#ae*2pEtljnp-nL4PPkxLOloJj zgarDUb~MpY+va$t@OFk_^*JQRJpB2~@A+I27a!))`q@R?zFq9e7V+}vTwcPmX!OR1 z-ea+zp;a?mF5x?N*!9fW4dy-P9FwzrMKUzoMo8do1-@A>Tc1G1U;bYeBKwy@juxg4 z=4P5s4zF!rIo=9nKA@<8&P#+b*O-S@<@)O#Et@7EOp{G>_m+%7M*gyiwI;aoJysa3jzmbRY5#bwSr>Px`9-lh2nbeH+ou>AGI z-i>(=+?Ok7qJ!6Xt|kV$6oelwYpZoGVoACmP%b1kET^hH(*p`@sAQeV9S`SLX9$Gz}Vytjx-?$AfipW*bb- z1W*uR%g0mqt32N+Sf=;*)^2tE~DW9p5>gvpy4>#MZyEly@V)bIhwxdOx z)7vY3=XPnxL$SuI>QQ4?r15U)qqMg1JwWxT-H%d&ka{~Vt{pKW8(W>zQWw{>$$93a z`m4STo=eA@^@VF&gno&tYh7*AK#m2Mt->W~o3 zaXYwfi0Rp>?qogku)0(gF`suezI<{~HhyDXeLvW{wytRwPptl#_k`Ejy=xcOi}U%_ zY1KA-Rk0>FNYz;+k}d%m4m+X>VrOHIm^2g%_ZNjX{Dzt$cpIt&FO2z zh~1C0jZ5psT9|i}=;f+Uy^m|dRo}Vkjk}}I*IA!Bh|fA`_tSdBg!h$C^1|xo z@YT}y#YW$;8QY!A9q)$yj{cC%`LBeC>C5u^jLRj2t8vKe@v12)q37WBobO}g^dWx8 zFgMMk(X{B0zKiqR>1liD?xWGswkK8886e|eszxEiQ`@u6!-~?$$>W_(Rphj3&Fs4S zfnfLI`?;j)ozX=MK^5UMN&TP&@AMx7ZS{qRmM(l;+%z%280r2UYqd{%6`ZdbLx zWWH;!rXx7&Y_-px8UHP-al)$Vse5;$_tULa+4`Q>_hV6b zJ7z=M*XOIJ8}PF1yu4O4%F=i0YunMgSufA2?mU~POVJ$x+;$P8l*aGSyp+67<7?a+ z=F@BEpWit~xUF40aq_lb6Q-%lC1~`z?{U5`&~rY}GnB2{FIj4I&qIL5BSHA+!hP*H zLb7yKvQ#r<<_Te=yZu_9*P8Oiz~pLnH8(+HI$@(Swa4j`qn-9_*TZbr0?FOXkQs#F zk&o!nwK4Az1H&F`!JU5j_ScLTPbzn(ICr-=vF{r6Qfb|n<+HU+$&~Px%8PX+V$F*^ zB`U~dpz^eSLLO1~^bult`jB&RFYO%9`;%h&{2>)m#9isv?0p~-!Au}+o#WU zqb(iBk$HQ?@q|h~-BI92ZH|!QcuJ{{=yJDeZL%sAUx6mPgLRbOoU`tiS6?0sx!&p3 zEvlKTQcIOLH5WhGHR{#f7r=^CN)TRl`QV_a$yKD&hr&uS9iC>E_b=VguD9=|R<4E2 zY^0W+$BT$9TZ)=zzShW_iq_n*?>1yuu@YULQLW8X9e*{^%ewhc$U6_LT$|z3mwV}! zxyJ}j86vId7BtUheSA2&Qr`h{>kLrJ6jr9rD-i4>61%@k+T=sWAieQHc16Ps{*@K& zS`7+6H-+%Ub;E@DlIR_k_u7D=Ak|tSWaKA_q;~_Ku?(-`#aA3L)*NIxGh?+je#O?2 z6%=I>g--TE4TUo+`sLVM)y#+XR4?CYuWt*%`_d~xB1!q1aPT)K{|M9s6fs*}8B zG`=XxZfUe~#lo?O%z)0~D4^)xmm}J~()ezv}+CnbUHxTO?4JVvpIujW!6sM6Qp6T_+*TLg(MqF-{*6)TL%y8mkZp*fqmHiNmg zl)k~^mf>g|6H6b{`+VkXZ-yD(45Pz@&o9kYXat&vwx*8%g0WyW#IUP;VD8Eh5H)qp z262rgI{eL*Y~$Q}=}xVJX6@)^ZH70_+Ha0OV$3;co}{q-+X&mMcl{$lsqs^l@mGaf zbA_^Vg~T-%Ts0T!+-G{keA@q>ci9Py+9Uv&< zVcTy@OPM80tlI8EEVvIpF5F%`S3w&7QVJd%!*1j|0dVLu?ujWtiA5D#M7PAY$yRgm zlS5CGFh%$;y7pVT-<~S}<5^^#(@uj=J9??iXSSiWbUOo4?NkVNAE-A#DLq`jFH|4@ zxeg`gwFL#o3<^6grv{~s!=i}9YzhAA$+Cxj`e@|}pSF`$QcAIjDPDrg5kJ=YO= zuA`W&t&Bib;Zka5B;DXAdR-BEUByoA;hzi96fsuUiP(I~Yz|cn4po7$!KysRYYSlQ zR0{UB0xx;(1aYl_1iUcpcGei&TRUe42j@`O`yv|0>$N%h(aR`a)asTtWR^CFY9}oA zWvk~Lr91fxI)|e=hv^)Ge0?T?4t=FF61mFKa9b%jLF}y@PGxddK4$?+Y2mQxP#85` zQW_(4#48BbY(lGZL#A^Bt9*mr0QnPW*EjnVnyb8AV!KrG&dsX(zT?>%#ichr%5G*u zW@aO_+A?ay#|c-bNAi!=MF=;_%p@vN#N=Elt^3(@=QHYk9YYx_u{v-AIOS17^+jy8 z$3n@v?5!OoYyq{7?q^q>&z_>Jq^-m}=1R^*%sMLz8iu1AhDq((r)i$mt*NyHRmPJj7I-!$C(E_Ym6=G1Xd9QJfYa8=}SYxd9;bt8-=oPFRo-_KBeAq6J=^griz#X=Uv6xJk=x7z3t!CT5l@3KjQZ27TLd_2 z?b>#Tvs*noIEf&BPhDq%N6U_z@NwnPA=^)LHMYHNcN>!zJX=U~QqaZn10A zAkqI7pi0-Z-9o7u$UPP~8{Eezp?$Vfh~_-Mnm8Zvx{b29ZMW5fBwN&r`yHB5kIgqq z%`onOHUy?AZT1DwylG|1%R&Zc+JGS)_&YZwSI?i*0*Bx8{;Al=bQmq%72*83Zu zyj$*HaTN;?vvr+zIL=i~v3c_vUIdt54626RtLAt;m>08jvL3-YL}@*=+geVVz2enT zKomFST$8Ecu4&kat%RwwmkCsO&6|5~sz@_FG2xc=nDeummO zDpE-~hs~4rqitM@ol{d_xY}aVq5zh>?P`;j&YE(=NC$~Y;k4^$KOJLjJMwwzDY)nq zS%xE>?>UQTlIKn7vC8!^F7rhYBGm$M&ZZ$|C|58dnZT!omX7hZgPt| zcfz=A5)t~;S7Q76`W``V;^Z!O?woPiJR;OvM1uOpb^*1vL|rF(#4dvDiw-;Rm%1|r|Sh9R_9l?4{X?^XBZP1A}XFTd>Rr>(I z%LWjqgz_3DZDs{o1Fq!=Dib%+h#1g`_(5dc9B=5D{W1jy4B+I@NDAlxu3E#MJaG!} z5{PfXIf_p!LqKbRne3y+k4QaG$wJO@s|Up_K|tSFKwj?f8YA2a$m@6WkRIhI3XK$ls?pNg8{<-;Pw_oK zL6Xo&Dd+$_t-UGkYxK*_yDXf#jI`ewY4b^9YQ6-)K-caBF^N{BL?20sjud&Pdb0F?>A z>)I0_?*&l&#_zi!CnN11@-8i>t~l*?aatC-ftn&P9@>2!)M;}c#7hqnC-p}eeWHJ} zYKPJx5*ir=Rg;}7E#gooQj>FFLVwPI9smJFZR#hoV54%Q0pisGiF5jYKju(uKt~Di z(EpBre^wfF(Sst6R8R}Gn>Mh&onfPc!Q90#I!S+I1F1yZCJPY*D^Vqx1*~5p6(nW{ zt%Rxj-_i8XD!ldEBRcDsK&|I1h_?nr?->HZn;X}2W0r7mhp=$T zg23iuT{(;#$%tDFf;)5vmn;hW8Hp#?r7`!;9F<08e~7YwPHq;Srydy|I**-6B}~;H zqUxWMXHsX>{sqS!=U!S)FKyD#$UNvzO|{^s910nr4mPsxW3v2xG8PY;_k zoqor~$Eb~Yxr83>5WvP^tiDHZ_MS4xVHw@>F9=l_a(~GF?I?5ie>=vJqNKjUx8V9j zCFGy;f%baQoJq*P-b5)ICgcyfg|cvZbjcdfuKTd4@cumkNKBI!e*Wto!SeHzpTsO1 zSg6O_(wt8WZ|uZa;)@v6J~F5c5i5ze$KtGGO{L(bgKGm{I-!RP2N--jW>XLboyG!! z9GuZDe`D*JKp_p_pr2S=Yz3{ZY?h2{Rz5?1k40m6+`DG6e%5$Ji{DVP4`@jatb|sE zSU((qNjww=NmGj(N^fe%JE8ATKmJ2lV=gT(I~V#vqE;9r@Y`zpnNIvYbRXvTSp1Yd zk#C_t#d!>$ZJGq#L*FUK338xDx4efQP7qLsKUx&?wz>bVL@fbGfB+Y4E{+8T< zNRTB@4gMAyb?d5jI`4skYTiEzcA)DM#3G|cA4KT8lz}{ezP}!*i~rkhfb6&3*{Tqy zK~E6>BCVPeky28TpdKBjzeuz?%cywPck!$>T8q{IawbfFUvJ4j5WfQ!s}R=IvLw#3 zFxJrTIh6Ij<}A^oPKQoVYRYEQ%>jFnnuE!`zP z_hXi<+nFf;@6alKHVo4rg6W_0)?}h;p$2>e${3Z?irJ)9`#YJTHB?=ktCZ^2E(^Qo zR?3~4;G6)M(ZgGf#0RAb=)jGDC;jqM89i3NV+vKYNk9ocQ|ZTM%cxk^cd@KBnq>HM)I;P2 zuo}S{Uqqw!kw$HZ@YX2K`*)H~fC48#X(aUS37n-1bILwSqV@nNZ~&A({|(h*P90F= zEZ1QU)nJlMqiLE~EG9B-0P&U!F^BRo$%fHv^jdSw=-y08WW#X%A-MiIL0R~UdSsWN z49zaB7)p076cLifuw=rcN5E4uu%sdCeAVr)({pJ85rFV)+zx4xu&Czi-woz z=;5FM75=)Ja;aOXfc_Ame~w>P1X528FclXHa8cP>UyavZulA!kXH@7X?Nw=xFfY$;MD8Y?Dc*`ExLvGk)ji?3mfx3ad_uJG^<;3lu zlSKQ7hRq%Oa{r!0tr1AT2qeudPV<{*AM%Gsbmt`RGpQYs9N9m{b;q9aFvIz4GH)ul9m;Rf@eee(%=zw_oW`8d>>ZRN1FKp**^#|6 zt~}1YOjK+dSuz?~EUasbsT6T!sBPcw zAV!~&AE2+cCOVOlP<24ix_u8-!vK)<@TwngrJ5WH&(@tNMJ!JPRpyL}RJW$7s> zCPHxBarHWJb9!_#v-DB57{Rl{v$yqq*KH@Krb8Xf=VkUGyGv(snY)i?`#X*VN9JY4 zs85ex-uO7yUU+%exYn<^OSQRpj5W<>K3X34@N^g2s-J9IZ^_&}^*W5+JofdXGRCws z-W}N#%e-&mR+zFIVn=g-e|}|kcbog}^|^EP&G8YsH&XTXu2FMSLz&_(0FciJ{4e+X z|KVp6lb@_Bk6Dx(e6@3Jo^kmxA z5tF$*VE3tiEqCot$>)P1WcG3(u>jg1=c>L_rOeUV70-s#A__bbl$nQDa*c`~RZ|Ym ziZ))=r>VGXdsQTtZYn$TUKwoXiWy#SUl!OlZS52mraRpo+v3V~wW;^^cKKRgTeP!U zuY3G;!pT>Er0=R>+sDSX-(@m4mHUK!y0rg*hh%Nx0=!;cA1 zhjYDWZ*NOy?y!hLaz#8bLwj+ zHcV3o?hwl~%*dqHX?kK@TxYjtn8H7N@ce@C_`)C|cXW&CMqgXCer>f&xe=b`%gL21 zqskm%DdekHePgW?QRH*pUTJf`aQ&11^o+Cd1>E3k2Y33`&PHCfAu-L!;k1b<;-r2! zcY=C!hqB4nVsyGZ+IUxNbhE|y+EC1S_So^2y(|Bs*k!fl#$bTU_4$h`xb1|c{YClL znG;VZ?UGS%@x1pPa46-V@-EUt*xkT};&?D5*S3E6B(KY~@%r8wY}EMT;FM|iN4nVN z#r0yJ*R;Dq;?GjVPPH_}$qXXRw}j?|z~z;3;&F%_!n1Mza0k(`;a>f+Fj)qdz_>d7 zbalz}rvBHpoo})5_NM+%n=L0Oa=Sx}CVFb}s$e)BHw10O{mgNg=A-Sa?sfa9j~fRK zBAxYDh^u*{{_6V9_WI%H>R`v6RbQCW;m(b=ui-J}&(WEL;#0?;Gloi6?4J|*_|jkN z*5%BO@89JjxXaZXoZy~NFYzj+r$PJsWfkXUTcW^3xwkfCk<>sjMftmvL6I@}@qOTjeYY=y-6?fSlw^YgK)WSi@ zA#cwnzyDkw$AF~HLZfR;nsCJi_ACG4Y|USmH3&Mu6U5Wgd-Q8ZHB?? z-oCo9H|=z~m3;dlbqj7`0?t8JFd4&n2&Y9jVXm7+_(Qe}HO(iF+ch<7a?MOy1r*AF z-Ku-%5Bg)+m-i%LLdEZ`6a0g$upztrn5RVVu|&&0D>^^|edbYdnB{K|zS^aU=}Gyz@Zq(N3s8rQ`k z;;Nvol~A(;2Yt}Ej338<#Y7`If86O4X}~zhzxeq#9e~T{r%_PNxQ$bYjqE^(9~>;9 ztIcv_W5gbJa73r zP8-2#E~|`&SST1o0HX3){Y`Pdl@%(r7y$R;hgo2nzg93T7<=F)&_VQIh^2vp43TdiLL04Ua? zTkwPRvSJ%tKnRK$W6b>TMBqL4BDI=eXJJs38PnafD!0_q7BALrn%7Tz=$R4}7z$eq(H&G^~ zl?0cC#E6wTxuc@j&81}Oe!<5@z^J8m(lc!aAM-{_2rYY&WQ~lv85q(s`II_Fo58G4 z@%@_iAV6gOBWKqB(FjUd_7T24t1whRPr#q^bwH1pMqBYL@C3jX@g__f7(SeIhzX9L zB{~63+ToVaHroJy86x8zzfz`eqf4Dt4&Uy8vP@NxF){=1Z!z6G(sWCvX5AI+QH$^+ z3z2{fbPrCtMgnM5oB?)cyyzAhqywH#W5r`lwO18*wqcl~TXK)r2Vj9SaS!b_2WwSQ zM+SHyCF-hx6pc@q5LtPAi+I{!NkC)8z*4X;x&i2=xed=9B4`wVq>KFlk5yQ*^BH=4 zs{uA3ab^+_^}G*%?A_8hG1gZp^k;alf0v5OBT!V{l_iwHEhiaJh5_7q(*Q9SKm#Ox zeGFZE?TS5sbwZocT#B)8m0clKol5od28aKh(Gw_x5Kb4mp8%H0pdHm6Kv|^>5Tb~g zic0BSYfv);y-d1*blURew@n!IMKW{NjdqNQGOgZQESIq&8PmSVHD55qB@c~1z?=C2 zpjaI6EP3dSz@fwJ2A;)}AOTbvx}iw0%C@lcsemQWk82ESpJI#k(JCQWC0qiqzQ{XA zIQ$JSmmyua5B7@uFNN=v$f)-Lc0MUZ^E)-*56HdS<@YznU((Hlz+VCod^%{0n0kFU zGkbA|-!OctLXDjdhC#SB?@@8I-N!|XTAI$p2Urm|Z*T}0^u4%Of;8L>~HR2vzU{KERb_$ zWlzh?(TB1;+AEU2EEFmpCmz7zXWHfhLgX1hl?Lb~9%tDZ0Kd<$0REx4_$A~slD%Pi zuVHh`%2e=~f?a@r@KbhKNK1$JZvtVjFMc_x$|q^}m4Fq519z#jh;97Q5C-2$Cq4xi zjT2wu@P`{dMI|w}22e%-+RAzsRi27QT~=9-ZovW8tE6tp@Sp^Yi2GF)>|5G?!$Q^N zdskGlN-J?MH)s(LR(Ia`8LtoGW|>Z-%Q39U&*@^G59b9J3GuW5N?!U0>&Hn=?e3!B zkh6qZKxhKN1R7)n#1OT#OcFCpXU189N+0H0!psg3|L#Kof1HhSmTt(Jvz#T_@Kh)( zreCnK8HhiC;SAQS8?sl`tN9iMKmO_$+>OACR>IY7~|#ZF>x5%@htK&yjL#W3 zV&q0rSmtRZrL5vKOQ}cYHE_wkxNrK)Ij9wP=fuCoql3$lSPPuOw1Prlecv2M7 z_sv4xlpTa`J?@~nC1&!OD8_x4D!`;_otclf<~XLk+tMia$(<9eQ#bx)&e~hNRy&86 zw=DNr7}?IMj@|;x!U1~X0F@v8s9+6mK*XNJcf%O%iW0)H^u$3fI?RHqQEsGoVS;^t zSeCy0P(O0UIj5Ke5PEMqm1HxN5xd#`8)ffVc~|Y^P<|*kEt>@;IZ2q2BJoXYGI!dK z3JG50NMHZXeS?*b)JbT&yfM#4&x2R?C<(Wc#OX8qf~DfZ{OzYGdyx==s72Or1`G~b z;=2zA-KR>4a6c&I|3wjxF+5X4f_kdt2Ycis#xKF&q zfHoqYv=4{MpWimT`~Jy+tZOEpsZd=?6& z78uF{shkbtq2R@mN_2(-Vwa-C6QPPo8U{m#nCQo2O=Qr{55hx*5+mR_<4FsE_IMWKH)%ZaXEjJ0$x? zDwRy)FLDNWz?y9^heE-i9E4sQx>Txw5|k0gT|F{6K)k)*_&{vv1}HDmM#iSy?T zh92$`!B&hU0uKU=lX+L--gmK*@qJHYqFqQL@iSJB51XTW5QR@O%J=;)sQ`xbWjv5x z{ssM6ko^lTq4p0FDW+R+K&W0Q4~e{JLgYdRSuf39m4_Wfc1i#$67`7ESh=C(P;XO= z6vZC`c~G}IcBTHIj}@Ve<1JL}F|5gwi9gD9SwbTP6rtZTa{#exn(-`=N-g|+nID{0 zvIY4t5G+=a({l8&X0wv$~h_6}$`HUGmma;?nToC9| zOo&9l6;6{$Nlr2!*s(iU^5D)=_7I;@k6$by-htZJ6pJ3RdG1KA!!FhhvTm*yu1e5v z!scpjDQ2sZ@kY5OUZ^!`=BfEm)hGLfYTlo?t>NodtcRH&3RSZ49-@lxTV#|MfWAwS zHJn$Ir=yoSuSk=nGjf^-sNT|_Ok*`?7jK?xl8mC!&-?xLnEG zO(q_1ko%US9TO@(Grj{aoEUFNH_ot2L%kb7*ytX=d|X?!5;45u;>vbpQ(@4sseqg4 zRb0nykrpzOTBm=dZ#Qv%Q8-blO*($1C$PN{d^2B+TsgNn?rEqsbT)2)Cr*!)>r!Vw z>c=#_aeQWXD(3vv7e2W|MD4rjT4{MPt03T3;3=#>;v8|~boRrxgXZh|{>}5gBwuR^ z#B9j~+Xt*+4{z<+Sy#_Va(7L;3-e=_;kyG>RuM0o@R&OthPNgNrzw2BYi=5Dx_$JK z1)>>p8+*vzB5BX+8v#LqNqC*(C*QYC&xX9G8^l1AgErHZeO|*V%vbOxyOYnbA=NZh z*WnfK)t5IRHp5=9tMP4T)mQ1Y!-vIy_aTPIi93U)#`R*}Ytc@_y@kt{rqerPbi&)} ztAiB?yHgz%ilXIm5tlR<#Qva1Ui&U(#Ae5v&#;Wqi1vOYzEhsTk>ly&Aw?!-rq4S> zG4lBK{f+8C&kfq`d#L$$v~UR$03a3h>%)KUH^Kj(&%8UiTbuv>u=}>2q1^-znfGjI z*<}+3%K%}J7AO1Wr>a5&Hs#b46B6bx8)LCH25Y6e&PpWEpd6LSf%t}RwN0l}i!YGR zw=(PV)M0ynX4Q<#A&qk3X8RQib|Uew<~k zWZ;h0gQbNKzJMuXZac2sABY!A7fsU8n%VZ5v>&>1pW?az(K00qtR!)f!qs&1);Y-n-x*(c?ewP83o=U?Rgywko{z6p`1V3_uk%O?+I=z(LH9 zucQxnYZ7>AHI0a6StDr0gF4L5uYjf2P=B*WjG|YFYKhOSSlq=PIKI=U2=q<_RcH%sIS&J7$2=8`*ynBhgr*1{#_P){#JkurDEVUE zIjT4=3fQm<+-O1*&<+oOV?;-7BPBe6|2k-NU#4=#rSe(fR$RWHO89ySGnwcH?)Dt{ z?OyVX*PtUt0|3Y=|Jz<7{L?S-@7|gyjTVhb9-_umcK$ljp5U%PrF*$(ZOylb@kna=Nl%<@Sr9)qwFi8N?km(Qs))mxxaAW@ ztU(gFf^r2#aS~>b`(>m|zE#owgEi%d3<7#$YNC48sO>lym*wW(F_9c}^r=M8*V2~P zDV(!5z7aW~#u=yTUM_QdE;>w`)}4Zjuv9@}LFOP}>!pTaWITcq#QQ$ev<%aTbLW?A zBLc^ymBKh{VO&9dGI^~w6UsU~Ut3SKy*r0JRq?K+RzqXzUJ0Jkj&dHYEJM-FXHqTH z8}4|ALOu6ajixr0MZuY=%Sr8_jrx-KX(<`~Ty95pXo5B4z8{3tT;eRo#?Q!o?pRFP z*#dU@VxoLWo_krnFS#cY?${ILW*2U>Q(47Rrpnyc?wi_=$^8-duEgCjk%#X_*!oFc z53-qfGsHx1f&H1#$n2;`!)zAAoEzz9Ezz}iMF!Wudt&7HEtm38{VYO zq+WZzdcn>Tz)8_)hApz|ZTCR3uzkRFoFb;!wJMq7b7%8wyW(e~3--=w#$oMV8YM13 zmPG^EDvK9*qZ)Cx*}7V1fOmF~=*IBNa4V|#nS6mxeSIT^Z9{a*!{jI58tPu%Ge8^H zFm2F$IwT=V=foIsL^j@6Ps?kSEwNUt6MNWI^3d4gJR?5-1%B0|0Wc-j`O_}9MkyM( za{Jf~mDcmzFOadt&#&ZxXzcjOPs*4~D&O3D<{3CRA7b?WLT+JTTNb%tf4Ft^K>g-< zFJ7(=U2;fhgW}Pqx+YXD*ubxo+Uww&PS(bf2NkhrD@;n@BTG!nVnuCl=8Fax5@)SWPm?Sqkr$3yloo&-{WsB zr2lmMd(+`^ZHoVN|9ijLZQtFW;fSiP z|J;T5PoBT`Y29|G{TT@;tx!DwU$5Ff8UB77b$jRZXM6_#X81q5yZ*`Y_h*U!%>tzR zKUsd?WBrrm@2BYPbH|?%_V8cP{Ph&_PuIV1&f8=5&lrC6*8={1=>F6B@AY-NfB%eH a!T)nNt0{spZjEqIe}Yi?ekgqV-Twmv$#cd4 literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/example_file_2013-07-25_1904-dates.xlsx b/pandas/io/tests/data/example_file_2013-07-25_1904-dates.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5889593e62908ec6a76b8fde6388823286191ce4 GIT binary patch literal 13246 zcmeHuWmFv7wr=C@uEAXcB)Ge~6QFS?xVuYm3+@iV3GObz3GM`k;Ql)M$UZy!yf@ze zd#guvj~=V%tTpAEYgVnV6=fhGF#*s3SO5S(3J`i9FzEmW06;vk~~Oonj8Q)==cA7{15g(MdGM!4-2Z;ZPEiuT$38`M_w5ncqD%^ zBl;DDEH4H2CmVNYc;9s?w-RAq%p|$I#VpTLXI|o{`35<4)G4==NS}EOxaJt{R<(l z0l~(}R~>VcDnQ&j?DGMOro4rrq=+2qzT&2~ul}7&DPHz6(fKGtdIY(TOvgF}rScF6 zLKG08R9w!B$fCCQr(OAxfX_T)f&AUcCq(a_Kwx}*g#al21*#2dEacaq{F4PC5dj2M zeFqb3M`os9@Ba(Y|G~KY57VRKC*^xt5C_gA+eCIA4{ar3a|>H#C0j{ViMuFH-!w$$ zQwZMP1VIEy8!b=W>`w~ltex&zsJJ1x$pCV~4^x7NrqG^Qi>95|fq0UA~Y`Vns&Y zxo%5X7*Cio{s-Rs+9q~6J2&P%Z(xdilwcMHh$%wperI?FVKL2)-}-L_dolnh@X`1V?iA_ z3phjFvz@6TBTGLO3u?FY_KTHGueCvcyPe(9o}0HEY|?&(erGy#`}{WTBO4_#iQb8K_PK*b!?C0=Uj0ZZnM?+_0Jk>g?reJSG|_sPaq zKeOyO2|td|Rxkh5N#0!_9!iKV?*v8r?>X~1PVq@%UQkv!Qn)3!c|1S>+{_+wfo=1C|A~h0X6Lx#`DDVlp~GBkVxz6T zDCZzZ3B0IQ__L8vr^}gwqqNtCxV;Gy<`sNa)#zZTn%(Nb@)CxS-vyM7uz8BqRyj2L zEyi*{x4cc}9+-@)tk;nr!a4x5GCrzCEAK1269vPnFK%O%2~>6CA$ux|Fd+@Gz4!Xi z{EUN|4AkQk+NRCpFvqF4QX9hu5Ay1INPgC?);j`05oJ%f_D;6YcwOXa*kv5{QP%sT zN@={)g~55Dh;Jlx+3O7`v4d2YP7})Sz9k6Ur80jCr)HgMBUb*@13kX^VoQ6`Ya@!E zy}qWZt7h)Zxz+E*Jvu zJQL=F5k*wqlQpaDLjWI^pqEo(Z!w=e>j+J{c6aS3ci*Ky@|F^qBF8bzwtxrk7~z=dfuJY2zqW8{rBoMGzt3?VNs9X=SViG@%TtVHpQ+w0T!z2*~KX)OQVcd2l9 zmf`zIf^QXNmNq~2Vd|OX8_pPZyfp&3`INO@jz4+7;kC{0oRIuE86?FOYrvl;$u%T) z^3x9}bmV%IW#x?wS!6RddU*r)-#k$0B;gec4FD7p{JJao1CUPUCN?I_f4u);_OX_N z9f25TC;0`DxTl>FwF|z4bJ=1Ifw^+tBIDu5HsUjRzSg?Wc+}TO4H6U-qC&c{%^w8? z3)Q!j`Q>t8BWA0+($iM2ihK3gZv-xDMdY)Gl zm>pw5Sl`am`;vr&A{D&NYRXGxV<%tWYIL;fTD2{Q6(2^T`DRpZ;Rv5>zjhRoyQks=EQVEpI38w11W^VG!=~EQXKiHu8mldh31~UI)YH;&UwAYP zwS7>wNgJRvQNLzaUv;u`h2#fnX%~*1J+Cvezzo)5)G8UoK1aWZw!G%9N`C_&W5M5FuUH?A#ya6X4Nz8FXEnEHO|vOv@ZruF0jx86Cg zZP)Au0go45Oj++@-kGgDNs2MyLcd7>ST89&hl0-|MPVKX_82v=FOqp>NAi=f#~X?n z73{&K8N3m0j?9nC(RA!ehr}^Gj%Ah+`HUCekrQs|c}9&pZ76g$cX{@K&o=^eI{ZBU zKHU38ftZim{VuR@mtm#?yF>D;$C+w<{+Bj<=Eyt!j@P^WZ2od+VR%Lg zkBj~U{cJw>i(TqY@5g5F4#x8m*f-N*l1}M_`69&rwgW6xJH`}DjX+%BVMQ0|wE-?lXViXL4juCuCn2oNSyUHg`)OxC-Z$C^8 zznWhXXi+E?;BcbU^8f4T&dG7WcS z-q%nrW-`~idV(#Q(jH`ULhPEeVcH-XaTr~-?%S{Fqz^<-e%pno9WvqDr*#P3MEIO! zjNqb^W1_)WcIc;lX9g2gsi|}j9IK_XXwk1_)@<<3%MNZjjmvBAtbuC7b)IFK%)QSD zGFQl1{Vzuu)W2qT0 znMMBrZ`@gg@wkK?i5iL^iOi9%?rtdXbPh@TYf`mMl0G>0Q)dMXIAdw5zQMkmRNoBKuN>VwVWuDAqNWvD&PbXM&d$ca(w6K!csCr=_%XU5sD zU+1FDi|Dvl9FiKs$EOHov&x|ZzjEX@2z^8KPg>agR;|_R55x9$-ld6uWChq}pT-nV zN}Mn&p?y{xb2uQSRgs7}<*{h+t$2{FLVaU?+IZb>**!zmctNv=>2fXIjW%*4IK^-2 zz{&U{E03QSnqR$8uystEdz`LEXOu4cRL|1+_Tk%$Oz{ zRvj_H6lK2reW5<_XBrt0^;67L*mxf)^zD%%l=pyYPf}

;n|jkNSr3M4gP80{Rb; zco{^43~(Q{ZJi@b)2ug(NOs?*HU+rl=oB7{e~l~$D!;_5(uQ2T%3`%=cMF_shs0V@On(g^Ja~P#&w!LLZ=a|d$}Skt z<5M6kGz6ANiQ#RAT3+m#LOHA~OqB<}HPRQsM80qq;v2E1J zN8t_B7vZ%nmO6`IaaJsC`spm%md1i0n3Fj|y4y&lH>lta1iri~ZL$G-H0?@6d|(@} zx^WQY5PP8W9wKabW0_)I>>;BQi0Zowq_k#=Dia#D8@2QuiqkLoOspBdyFYmg8SRg! zjZO^rkFvX18V=68SeKS|*A=T^hERQ%8sHr8qNewm+{%C4_(61M%VH$`*&rZB4O|3; zkHsA@Y{&JvP{$Y4Gyc7op-_qCeFcp~!vFwC|LkR)+^tRiXkyybbgY(HFn!(}3&6G) zyG$2Y1)NM%a9}nnNy2z0*?dQv>z8y$u|DoRlXS5Tt*S8k!rM2^C6V3XsOIE|O30^T zlN!&O^&tpXDXlzUm~KFycxISGd;nbwwTdG8A`agxq0JWu&vVDAsd$#mFP3V+)9OeK zHPuT2iWR4<2%WufIzV&rkdCFHsDR}(cB7jCu^L<`wU4n!ddCh7O;mv6nnELwD%m88 z&GSkVH{8ovU#d`Q!2i&YoBAGZ+3rIZLb8>Kss%HENzr83;URwattPZzqG)0Ke2y;1 z^uQf@&MHzlb!o@&Y$=IfrW$(M*^e~)SlN%UqF=Y9$r0kffFH=hb1M5o?2|3_#-zz` z1sb=~FzyUoTmt;97to{PpProzRWLY7$|)WUhA(=VbS^q z!3gx?xcx!i!$g#o@#+_oHVt((Q!1+~R-Z?)RbP)vIbPQOsIAuPTT~IeC6gBrCoR=u zh?gPngy1($yU(-Ws6WbfED~-^u55VbPwZ4(9zBEZ<~)jBy~_E-svXG}kO{Hd87dZ9 zJLr3Ig?aa!rRs|x*Rm(EAg`C7VK^>MJh29U^A!zmuJny4^3_v>ElfT1>e9ab;<|&n zl#q`6i9bvw!?+F4J5qyheU;(qMajEWw~m}b-lOyF99wk04uv}i44jG8gC3ZS=gt{D z8hOsABeavT-<9q&uoU#6hd3OgTz#gGz{Pb^WD_}}bf_L@kqaS8`VDEs&tjbO7g8*I zQH(2cC@q@3kCRx5=d;bb@xhQ?)KQMLv{2Bt4Og`p^=_vEpUn+kx^Xx@_umb!KJ+?x zrc6*RY(99r?JS5KcBomC$G#l*Xq8LZWLSA3Y=?Y3N4~p-SALgSjTpKh>^kAH;~I#V z$7Rjml6>nZ09g9wM@X16ZMuI2)?3u zE2zB)^}DJ#b~ZEE^=Et^;ipK7aC9%C9068R&{5c&oh}rEu@_CP`icJNPR2J2F~g7M z2bdP!Zx*j-5m$GdeJXz5yf;;=ikzD}y}z+<@_q5Bd${fN@p)fSCBJ`ixq5SP-qx!1 z+`&`n^KiE>;C=Uz^Toqs@nk~7K4gDp{BYCD$KBW4yTyBv&qm>>ck~ob`xIrc_yq5T z*QcGoozQie$Mf6FwzC@M>h+AvYny|an{V3=Vd&%I^NB6@ijU_KU-qihr`Ev}yt{1T z8)jiJ3GzESJ&_*L{`YQYYNd_hW33~b$gn^lcVF#R=<6Uy(1d&y#c0>w|p=M3?3B{mbdCg=g7JTEF;N4%sxdCYg2UKl{r%An9K!zcg9pOD zYRGrMiNS~P$qbWV5G^o-M~oUu73uTSr`|$CKm$z#f*go`zspo(o&fa>Tz#PPcFa9Rj?h)<&bRXe@@SWcRNjgr`qzQtJQuqZ`A8Ea-X@;;tvtUu6}jk zhBUE8yEP$iFF$X*J38D#csi7P3NHmEqOvYw@T--aM`uO_282SRnE+auUO-vh0-5C= z`blIbml`?XljZdu?}^I6PLERMf}B2-fEW?!GEXg#`+aw~HBABpD|`lD81lq?N#Cw@ zW)WxOc*}85c3#96B0~DO-lFO6FleJ}OK=mz_>^BWDrSfjTz*JD&<>CayEqw(nadZ} zHTA*k9UYsH5vfwx;Xnq6tyd5(yR3!?j4xU>+6%I~=ISA_)TdyQj5T2$Y*JchE^;+~ z5ws+Kk{(o`0^E`Gwqk?v=C|j?ZahhjGHvcBj$Up{D>|VyB5Q~9XVIB4_p6PW7teML z$6M_raPFYv=&*#pZ7=Xs%E7uQEbB0saFwf zF$}(YyQ?!@DYF>T!ci4THyc3qu0pZNTv10IgJDH(@n)oHLLO%j!r$o#y*Af2+Nua@ z4i_yDK1n9*Ea)agGuMhJDwzW?P-8`u{)b@t||@`(=^a>Q z<$+8&Xqhbed>mLMbyp*K5JgF04M>0rg5XxF`X->?IB};g#nA<~Ocnz-iAG8;I79Zv z3haitwMT1MAl^x0Bj*%DW|>H4z>8_AFHPTWM&2{}&OFRgP3If+MKjL!uOYL# zWfSoMCOH%A^~|bl{4a zL(3VK$-HyISr6Q)uSW$a_D570mLGDk_Fa@Wl@X^JdQlOOeTJnyk!>3 z$;G16jdH;n-o=jN1fxtWK9i){GL%*)u^9FeU`4qQ*=Z^KMyc5qG&jNqtq0?&ZqBV` zUOGOo6$i>q7u~5fsVZ~kgQZmcWrpC|AAVMEik_wKu|O$!ejDV%;V#Uh2VVS{Qn3=X z#NM~~3}kMOsH(g@z9XFFVBc~I;%F(zPxO$Mu&$5J(=g4~WqF6kNa01%QQx;QRO_xv zcaMtc#_shrzOfGt^LH_-8D@XuS5zQZ(-ly%cWg3+OLS+4J5_2$4my~{J-q^j{@m>g zw9ZQ^=k@Q*1Zka%gK`I{l{AL_b-Xj!csGCA5zihEOql(k3TO-9g7pJOP@rDCTNXSaSJ>KL3#FE9OlrRA$sDZ2^|* z7t^THq9I{q4CwYsX2h1)QLikSgeQv4Xl|uHa$i8Kw5WaZtdHs8&(Exi5)(`*RKg}w zv{_v1_(|7=q}C82{74c2iaCq-<}W;Klc|M<3%p&0abwq>T0_XpoRUxJDm4vP9TY3G z?X~gFya%%_g_p)y;xWvg%F>o})uJ<*5`u-~;WaUCL9riAD(Yu~%h9tCSfVwvZ~BMa z=^|Ck2*?7}QkghVMPOglLl)%i=9*)OI}DN>PCADMBxgv9Vk`w7yp0#pA~V@Nt~D;j zsAUh*`89vuiMj9BiViob*=821)a(%g8~JRQmY;qs!^s;l`*Y^%Wb!%l6pv1W;2JWG zYkMkzE?}7`c@FZ2{Rs_fTzOqWnwVfV$Z9!N_q7E!A3p+Vx{G{08>w7A=Qnwi=(Lf@ zUpqRQP|nTk^V)Gvv!;k;^_%rUuGlB*|LJ2q@yjUjhLc0MlW0VZdKD+fGA*(>$2iMH z@tCZi5TM*V7b%+{jv~RS3@fJJP_1Jx+oz}P7|$<$0>r5FV_{?mOu{ORD?czq*Zv^U zv`V@MLo?eyu2c-dyTBw{iPNf`b<_gJxV-xnD&<1AUVe#Irvi4zA}#be$8gJ~h1**& z+q8(%>7bh&J63KY{)WzVGmAtq7;oaDXYGA&Kb@+)eCm1>pe08>Us_Hr=<-J;MMkd9 zNm<}44p>r-!k#QpVx8PxBh@0wl3Gq13meI{)`1}4mY=T&-DIiDekvbS5%}UpR4Y1? zCsJkIBw=#{;Ki>t*O+w5%VhFb8J{i@GFNS$!$|vMhLbK*fD_A`{Sz67U)EZp;&arl z5_4?BH0PET(VF878;U!U=z(gBIFfFDkgRRacMm1&AF}2ea3jmZM;SWFG)P{Xv)I*k zAZ-qm!`;NLE)A`rZY}Kc95T7kRf@bPxA_kA(x1i@e&JJyxQF%ZIFXEVjv*YsS+wD) z!{7r|r`HbVHVZQP(agN~00+UJRVIgl zyI^F7KNwG0%pd&*lxA*K?b!_*N@`eMct5a z*>{>s%??qHv~9J~ylHt`;}Ae@G{usi35{o#QYf9TSjciALG=sInC(22$81X~s&%54 zm}GOIJWt%05WMnp-+9pG5f#l8#`6Xll0Z2l*c-~Z3Fh+?MjTLvNn6nH%P&aXIYL#^ z?QXEf!?QHTmR{a5Z#L$u-)86Ojpt3t8^x%M$CoM%sWwkxeBb)z5M%fgZV4{lYfu#| zR3_J#a6TS>mfVv_%9|X3?Yc%+z+=l;g@zu-!kw!fQl^C|wl~pv`ZCOnk=(tEi;8qk z#sZ%Td;a~s;u@+~T|pTGBJa-QgLr4Rgwgzar@m1nmUUFwR;W#eb~=QPg2OUXL?5^P z9)$T44Im%*sWXyg&YoyNTml;Tc3(Y_F9IY_>4m`a z4~QtRH({Og(18k&K~{=&!oH%G+OUC;xVkS(1;H?A3|W^r+c3gpxuzXLt0+H_N;RHG z$Yj0|@{)|Dz*_1LZB&L@T=r42ujiar-G>AzCWvb)5VT@qxAcVILL|IT9FE2?*-PDQI9 z3tdJt`Vd-GF4c#%Zo9AyMxKxNvAuLyOu^VzK8~8q_=30SQb+`|b%rltvn^mB!beBY zGy?-*Z*wgrU%{6kzkEJURcMH-bYOW$;rOMJByLkb_hYw8o}4kQQqRFQIWj^0h#>@} z1aVj6TQO*TCI@>JHpS>LQ;H)^0};j@e9N$$Ax1sQyFClS#LuwQe8(TBhCb(A6*BCI zuD(ZffZ8EYb~TYv*`^oSX2?*se;sHA-@AH8vO~D|9&#xX4?IS)^1Cz*S)5qv8Ds4- zT-gsK8AfjL7&><|W*`eL4+Yg%tQJlfRxH9fmwvB{x#H+qa@do0-5 z#GZR{WxIhjtv||eSIqVB0%HYOx`>eijVO(o7ItRPFt0WfD zL+~E?;aQA284)_DecFtY5Y{eOcM~i4Q5{kApqcmWG(ROM7MIX*QbGfRRlrSAiUzRp zIyQo(Q5V0JNb$U*!0JtI9|GMw(81h%5Ef}86>@QGSh6O=yAPTz;tVGR%LCL=P>YFGtlfx|_zcg!e}}6zfMPh)2sP zw$Jl|ZjB^rO#ySGALbS@fVf91Hg^+^w_C%PpZ$h=*OBR7+H;*$mV2eUbG z_d;h9e<5uktL+s%FOU*BMIra$GH6=6)p2VmL#nGu-Gm@{h>rSfu|&UENmA9fEfFXhMOKE3 z-{grHhi{f!gpEN3eMF;V8b+N^;Z*Vq6u89y8W&em59XEGQ|chz;Sc-(DVo(2 zPDf}oj%g%y>7pwHW}3*lDQa+puT6wpVuvXhV7mq`Kf7hn#0r!_)K3rOUo(THHa?Wf z2SLv)j5izuid+0f)>`)MiZWxWzh_UC&=>=WS0>-rf+n^1Oq1r=^5}zDE%aV${Z;;?VjXK?Lh=l%Q1PHowd9^)5wbZSp&d?Dlk}?Ql;|%+r=A!Kqmw z+h~kR2b&3+zjQ__vo8@Kqr`25mI(0;?zzr27FEaXfQIpI+dsm#)Q-OhD==7;Cd3Q6 zXQEt>kAMxSUde^qo}nUJ5E9JknNq|=%V-M0nBx&=!t;pU9Cf7$?uFLMfQID>&PPS} zy2O-EJcqr>AFCdvsafq{U2inju+IZ4+h~Qp_Gz<&%_?_tTGgQZ$_(l8-DmaL#`CGg zmtT+X^mMZvcLRF)@bE{+>ClT0f14(~(?CiuUPtlJ>os6KrXjHUk#-(+={B_Y`zx4! zx#7g4yX(u1!*#9qcOrqaS3f(SfFNJa(@ma)V;qCkiVw(xmmgaNs;h{*UIxMNiHlSC zyrjb1Yf(D(kJ>`3qiV759`Ag7UF*1C9Lze^w;n=YrK@9JasKN=#R1$g|F@vsb2`u? zcr*|R$k@(E(ZSB%k=e-3!Q`Ku*8k=HfLs?8r!UjXg4w?g)hROUneId|6Wr*jcvC>4 z`SA`rzLjzDi8S2h@uuiRogQBJgYCrE&J0hLikLDf{%&^*r|_RqIgsq{g%7>MKOD+= zM54b*#jaq*mv#b=nV-8>>t@^?=uTk6){WgrketqP(8&{lO$Hv{gJE83IsKqHjZ$Ig zn?9*AdCEKin}pQ@LixjCVlnL(Oo2VZhbEa?^%g z+s9Q!m7H>@V%Y3&)up%*%9dT%8E~SYAhR$ zV1m_SpzcE;fquDeo%`>Jrs_Ut#sc!=3h1N;ogn6rJ&3I0Wa6M=;^g#;wetISRwkf@ zEiea_rIy8h1?)5F@nl!mP#4@gMyR1Jvu}pR;M0hRV}i#+2H?rL%%BwhFOSUaq=f;g zpdoq?dj$1Av)j<#{(qAjPiJf9_BTGkDg}6 z+-OuXA$|B#*=BaNI0QE2(RHWw1jE@6*WrmRtM8ePSf~iRNcZY=jep3MRIFof6+Zb~ z$Qg%sX>0@x3KOz~>pWjaYWIHwQbf`kA!NhXkXzy?Q`PlCPe++dN)rJg`YcvP6c^?< zyjkp!JmOQ_eaJs5{GhL3T0#+F&1}rGAN(JzNqm)-1WOz!=tIYW<=4 zRBkS6o$Bc7H*RrK?13ZOJ6K4U-m2gm#v ze#ky3WbEZbC5G|%pn60kTHyvG@vN#C!#n!JBViZ$3A@~A(kxg1_2};Q`pF|%f4A+s zl-%$P^MWrZF8|m70t06RQM~^9e-Qo^y?=fFmwzNE%KTNqUzySW68!a94uZ&k@T7ki z{3}`XKLw{i?4$oXb@X>VzY_@m)C2|MH~mH@{9X8W-rJwTk??;A|BVIryN2HhV}EK$ zMELjR|6la6-xdDOzxq>QGU6W!e`jO;uHg6m?>`l2fcRV>hdBOwH~e?e-!}yR6!pjc z-IzbO2Y=V{*AC!M9RR>Q9RTpRe&BcU-|PCH3IrJcn8g22mH%DoU+eatDgl6ckkWt8 b>7P|xQ3eW>g8%>`=tTue Date: Thu, 22 Aug 2013 13:40:24 +0200 Subject: [PATCH 4/4] date_converters are need... --- pandas/io/date_converters.py | 75 +++++++++++++++ pandas/io/test_date_converters.py | 155 ++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 pandas/io/test_date_converters.py diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 2be477f49e28b..3aa9955a6affe 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,4 +1,6 @@ """This module is designed for community supported date conversion functions""" +from datetime import datetime, timedelta, time + from pandas.compat import range import numpy as np import pandas.lib as lib @@ -56,3 +58,76 @@ def _check_columns(cols): raise AssertionError() return N + + +## Datetime Conversion for date_parsers +## see also: create a community supported set of typical converters +## https://github.com/pydata/pandas/issues/1180 + +def offset_datetime(dt_in, days=0, hours=0, minutes=0, + seconds=0, microseconds=0): + '''appply corrective time offset using datetime.timedelta + + input + ----- + dt_in : datetime.time or datetime.datetime object + days : integer value (positive or negative) for days component of offset + hours : integer value (positive or negative) for hours component of offset + minutes : integer value (positive or negative) for + minutes component of offset + seconds : integer value (positive or negative) for + seconds component of offset + microseconds : integer value (positive or negative) for + microseconds component of offset + + output + ------ + ti_corr : datetime.time or datetime.datetime object + + + ''' + # if a excel time like '23.07.2013 24:00' they actually mean + # in Python '23.07.2013 23:59', must be converted +# offset = -10 # minutes + delta = timedelta(days=days, hours=hours, minutes=minutes, + seconds=seconds, microseconds=microseconds) + + #check if offset it to me applied on datetime or time + if type(dt_in) is time: + #create psydo datetime + dt_now = datetime.now() + dt_base = datetime.combine(dt_now, dt_in) + else: + dt_base = dt_in + + dt_corr = (dt_base) + delta + + #if input is time, we return it. + if type(dt_in) is time: + dt_corr = dt_corr.time() + + return dt_corr + + +def dt2ti(dt_in): + '''converts wrong datetime.datetime to datetime.time + + input + ----- + dt_in : dt_in : datetime.time or datetime.datetime object + + output + ------- + ti_corr : datetime.time object + ''' + # so we correct those which are not of type :mod:datetime.time + # impdt2tiortant hint: + # http://stackoverflow.com/a/12906456 + if type(dt_in) is not time: + dt_in = dt_in.time() + elif type(dt_in) is datetime: + dt_in = dt_in.time() + else: + pass + + return dt_in diff --git a/pandas/io/test_date_converters.py b/pandas/io/test_date_converters.py new file mode 100644 index 0000000000000..7bba0212b1348 --- /dev/null +++ b/pandas/io/test_date_converters.py @@ -0,0 +1,155 @@ +from pandas.compat import StringIO, BytesIO +from datetime import datetime, time, timedelta, date +import csv +import os +import sys +import re +import unittest + +import nose + +from numpy import nan +import numpy as np +from numpy.testing.decorators import slow + +from pandas import DataFrame, Series, Index, isnull +import pandas.io.parsers as parsers +from pandas.io.parsers import (read_csv, read_table, read_fwf, + TextParser) +from pandas.util.testing import (assert_almost_equal, assert_frame_equal, + assert_series_equal, network) +import pandas.lib as lib +from pandas import compat +from pandas.lib import Timestamp +import pandas.io.date_converters as conv + + +class TestConverters(unittest.TestCase): + + def setUp(self): + self.years = np.array([2007, 2008]) + self.months = np.array([1, 2]) + self.days = np.array([3, 4]) + self.hours = np.array([5, 6]) + self.minutes = np.array([7, 8]) + self.seconds = np.array([9, 0]) + self.dates = np.array(['2007/1/3', '2008/2/4'], dtype=object) + self.times = np.array(['05:07:09', '06:08:00'], dtype=object) + self.expected = np.array([datetime(2007, 1, 3, 5, 7, 9), + datetime(2008, 2, 4, 6, 8, 0)]) + + def test_parse_date_time(self): + result = conv.parse_date_time(self.dates, self.times) + self.assert_((result == self.expected).all()) + + data = """\ +date, time, a, b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""" + datecols = {'date_time': [0, 1]} + df = read_table(StringIO(data), sep=',', header=0, + parse_dates=datecols, date_parser=conv.parse_date_time) + self.assert_('date_time' in df) + self.assert_(df.date_time.ix[0] == datetime(2001, 1, 5, 10, 0, 0)) + + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = read_csv(StringIO(data), header=None, parse_dates=date_spec, + date_parser=conv.parse_date_time) + + def test_parse_date_fields(self): + result = conv.parse_date_fields(self.years, self.months, self.days) + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + self.assert_((result == expected).all()) + + data = "year, month, day, a\n 2001 , 01 , 10 , 10.\n 2001 , 02 , 1 , 11." + datecols = {'ymd': [0, 1, 2]} + df = read_table(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_date_fields) + self.assert_('ymd' in df) + self.assert_(df.ymd.ix[0] == datetime(2001, 1, 10)) + + def test_datetime_six_col(self): + result = conv.parse_all_fields(self.years, self.months, self.days, + self.hours, self.minutes, self.seconds) + self.assert_((result == self.expected).all()) + + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0, 0.0, 10. +2001, 01, 5, 10, 0, 00, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = read_table(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assert_('ymdHMS' in df) + self.assert_(df.ymdHMS.ix[0] == datetime(2001, 1, 5, 10, 0, 0)) + + def test_datetime_fractional_seconds(self): + data = """\ +year, month, day, hour, minute, second, a, b +2001, 01, 05, 10, 00, 0.123456, 0.0, 10. +2001, 01, 5, 10, 0, 0.500000, 1., 11. +""" + datecols = {'ymdHMS': [0, 1, 2, 3, 4, 5]} + df = read_table(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=conv.parse_all_fields) + self.assert_('ymdHMS' in df) + self.assert_(df.ymdHMS.ix[0] == datetime(2001, 1, 5, 10, 0, 0, + microsecond=123456)) + self.assert_(df.ymdHMS.ix[1] == datetime(2001, 1, 5, 10, 0, 0, + microsecond=500000)) + + def test_generic(self): + data = "year, month, day, a\n 2001, 01, 10, 10.\n 2001, 02, 1, 11." + datecols = {'ym': [0, 1]} + dateconverter = lambda y, m: date(year=int(y), month=int(m), day=1) + df = read_table(StringIO(data), sep=',', header=0, + parse_dates=datecols, + date_parser=dateconverter) + self.assert_('ym' in df) + self.assert_(df.ym.ix[0] == date(2001, 1, 1)) + + def test_offset_datetime(self): + #test with a datetime.datetime object + dt_in = datetime(2013, 1, 1, 1, 10, 10, 100000) + dt_target = datetime(2013, 1, 2, 6, 20, 40, 100600) + dt_res = conv.offset_datetime(dt_in, days=1, hours=5, minutes=10, + seconds=30, microseconds=600) + + assert(dt_res == dt_target) + #test with a datetime.time object + ti_in = time(1, 10, 20, 100000) + ti_target = time(6, 20, 50, 100600) + ti_res = conv.offset_datetime(ti_in, hours=5, minutes=10, + seconds=30, microseconds=600) + assert(ti_res == ti_target) + + def test_dt2ti(self): + #a datetime.datetime object + dt_in = datetime(2013, 1, 1, 1, 10, 10, 100000) + ti_target = time(1, 10, 10, 100000) + dt2ti_dt_res = conv.dt2ti(dt_in) + assert(ti_target == dt2ti_dt_res) + + #a datetime.time object + ti_in = time(1, 10, 20, 100000) + ti_target_dt2ti = time(1, 10, 20, 100000) + dt2ti_ti_res = conv.dt2ti(ti_in) + assert(ti_target_dt2ti == dt2ti_ti_res) + + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False)