From 43673368d4309f38fe3a4f6eeb1860b3b5f3fdd5 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 28 Dec 2012 11:48:06 -0500 Subject: [PATCH 01/12] BUG: fixed old version compatibility warnings added copy_to method to allow file upgrades to new version --- RELEASE.rst | 1 + doc/source/io.rst | 22 ++++++++++++- doc/source/v0.10.1.txt | 3 ++ pandas/io/pytables.py | 56 ++++++++++++++++++++++++-------- pandas/io/tests/test_pytables.py | 20 ++++++++++++ 5 files changed, 88 insertions(+), 14 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index ee5bd061937b1..542ea12e95605 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -50,6 +50,7 @@ pandas 0.10.1 to do multiple-table append/selection - added support for datetime64 in columns - added method ``unique`` to select the unique values in an indexable or data column + - added method ``copy_to`` to copy an existing store (and possibly upgrade) - Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565) - Support reading gzipped data from file-like object - ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_) diff --git a/doc/source/io.rst b/doc/source/io.rst index bf9c913909dee..c54dab2900084 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1372,11 +1372,31 @@ Notes & Caveats Compatibility ~~~~~~~~~~~~~ -0.10 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas, +0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire file and write it out using the new format to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. + .. ipython:: python + + # a legacy store + import os + legacy_store = HDFStore('legacy_0.10.h5', 'r') + legacy_store + + # copy (and return the new handle) + new_store = legacy_store.copy_to('store_new.h5') + new_store + new_store.close() + + .. ipython:: python + :suppress: + + legacy_store.close() + import os + os.remove('store_new.h5') + + Performance ~~~~~~~~~~~ diff --git a/doc/source/v0.10.1.txt b/doc/source/v0.10.1.txt index b8137fda540cd..2eb40b2823214 100644 --- a/doc/source/v0.10.1.txt +++ b/doc/source/v0.10.1.txt @@ -17,6 +17,9 @@ New features HDFStore ~~~~~~~~ +You may need to upgrade your existing data files. Please visit the **compatibility** section in the main docs. + + .. ipython:: python :suppress: :okexcept: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1469620ea01f2..fec80a1855578 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -37,9 +37,11 @@ # versioning attribute _version = '0.10.1' - -class IncompatibilityWarning(Warning): - pass +class IncompatibilityWarning(Warning): pass +incompatibility_doc = """ +where criteria is being ignored as this version [%s] is too old (or not-defined), +read the file in and write it out to a new file to upgrade (with the copy_to method) +""" # reading and writing the full object in one go _TYPE_MAP = { @@ -259,6 +261,15 @@ def keys(self): """ return [n._v_pathname for n in self.groups()] + def items(self): + """ + iterate on key->group + """ + for g in self.groups(): + yield g._v_pathname, g + + iteritems = items + def open(self, mode='a', warn=True): """ Open the file in the specified mode @@ -619,6 +630,17 @@ def get_table(self, key): t.infer_axes() return t + def copy_to(self, file): + """ copy the existing store to a new file, upgrading in place """ + new_store = HDFStore(file, mode = 'w') + for k, g in self.iteritems(): + data = self.select(k) + if _is_table_type(g): + new_store.append(k,data) + else: + new_store.put(k,data) + return new_store + ###### private methods ###### def _get_handler(self, op, kind): @@ -1492,6 +1514,10 @@ def __init__(self, parent, group, **kwargs): self.nan_rep = None self.selection = None + @property + def is_old_version(self): + return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 + @property def table_type_short(self): return self.table_type.split('_')[0] @@ -1503,14 +1529,18 @@ def pandas_type(self): def __repr__(self): """ return a pretty representatgion of myself """ self.infer_axes() - dc = ",dc->[%s]" % ','.join( - self.data_columns) if len(self.data_columns) else '' - return "%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, - self.table_type_short, - self.nrows, - ','.join([a.name for a in self.index_axes]), - dc) - + dc = ",dc->[%s]" % ','.join(self.data_columns) if len(self.data_columns) else '' + ver = '' + if self.is_old_version: + ver = "[%s]" % '.'.join([ str(x) for x in self.version ]) + + return "%s%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, + ver, + self.table_type_short, + self.nrows, + ','.join([ a.name for a in self.index_axes ]), + dc) + __str__ = __repr__ def copy(self): @@ -1621,8 +1651,8 @@ def validate_version(self, where=None): """ are we trying to operate on an old version? """ if where is not None: if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: - warnings.warn("where criteria is being ignored as we this version is too old (or not-defined) [%s]" - % '.'.join([str(x) for x in self.version]), IncompatibilityWarning) + ws = incompatibility_doc % '.'.join([ str(x) for x in self.version ]) + warnings.warn(ws, IncompatibilityWarning) @property def indexables(self): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index f2490505b89c6..b790d32706f72 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1735,6 +1735,26 @@ def test_legacy_0_10_read(self): store.select(k) store.close() + def test_legacy_copy_to(self): + pth = curpath() + try: + import os + store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r') + import tempfile + tmp = tempfile.mkstemp()[1] + tstore = store.copy_to(tmp) + + # the tmp store + for k in tstore.keys(): + self.assert_(k in store) + except: + pass + finally: + store.close() + tstore.close() + import os + os.remove(tmp) + def test_legacy_table_write(self): raise nose.SkipTest # legacy table types From 0dd7cb4f474cef20b7fa5b89276697e21196796f Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 28 Dec 2012 12:00:56 -0500 Subject: [PATCH 02/12] didn't include the test file (in _static of docs dir) --- doc/source/_static/legacy_0.10.h5 | Bin 0 -> 238321 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 doc/source/_static/legacy_0.10.h5 diff --git a/doc/source/_static/legacy_0.10.h5 b/doc/source/_static/legacy_0.10.h5 new file mode 100644 index 0000000000000000000000000000000000000000..b1439ef16361abbc0756fbf7d344fd65d8a1a473 GIT binary patch literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef Date: Fri, 28 Dec 2012 19:33:54 -0500 Subject: [PATCH 03/12] DOC: minor doc updates to conform with new datetime64 behavior --- doc/source/io.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index c54dab2900084..0a2c225df375d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1106,7 +1106,7 @@ Storing Mixed Types in a Table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Storing mixed-dtype data is supported. Strings are store as a fixed-width using the maximum size of the appended column. Subsequent appends will truncate strings at this length. -Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set a larger minimum for the string columns. Storing ``floats, strings, ints, bools, datetime64`` are currently supported. For string columns, passing ``nan_rep = 'my_nan_rep'`` to append will change the default nan representation on disk (which converts to/from `np.nan`), this defaults to `nan`. +Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set a larger minimum for the string columns. Storing ``floats, strings, ints, bools, datetime64`` are currently supported. For string columns, passing ``nan_rep = 'nan'`` to append will change the default nan representation on disk (which converts to/from `np.nan`), this defaults to `nan`. .. ipython:: python @@ -1115,9 +1115,6 @@ Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set df_mixed['int'] = 1 df_mixed['bool'] = True df_mixed['datetime64'] = Timestamp('20010102') - - # make sure that we have datetime64[ns] types - df_mixed = df_mixed.convert_objects() df_mixed.ix[3:5,['A','B','string','datetime64']] = np.nan store.append('df_mixed', df_mixed, min_itemsize = { 'values' : 50 }) @@ -1128,8 +1125,6 @@ Passing ``min_itemsize = { `values` : size }`` as a parameter to append will set # we have provided a minimum string column size store.root.df_mixed.table -It is ok to store ``np.nan`` in a ``float or string``. Make sure to do a ``convert_objects()`` on the frame before storing a ``np.nan`` in a datetime64 column. Storing a column with a ``np.nan`` in a ``int, bool`` will currently throw an ``Exception`` as these columns will have converted to ``object`` type. - Storing Multi-Index DataFrames ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From bda1cbe1f019c9bbbd25c3288ad47678ed231c96 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 28 Dec 2012 21:05:40 -0500 Subject: [PATCH 04/12] DOC: release notes formatting fixup --- RELEASE.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 542ea12e95605..9ce4f170f36af 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -34,9 +34,9 @@ pandas 0.10.1 **Improvements to existing features** - ``HDFStore`` + - enables storing of multi-index dataframes (closes GH1277_) - - support data column indexing and selection, via ``data_columns`` keyword - in append + - support data column indexing and selection, via ``data_columns`` keyword in append - support write chunking to reduce memory footprint, via ``chunksize`` keyword to append - support automagic indexing via ``index`` keywork to append @@ -58,6 +58,7 @@ pandas 0.10.1 **Bug fixes** - ``HDFStore`` + - correctly handle ``nan`` elements in string columns; serialize via the ``nan_rep`` keyword to append - raise correctly on non-implemented column types (unicode/date) @@ -77,6 +78,7 @@ pandas 0.10.1 **API Changes** - ``HDFStore`` + - removed keyword ``compression`` from ``put`` (replaced by keyword ``complib`` to be consistent across library) From 395203f8b8bd4695c5ab119d23f046959d26ab5f Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 29 Dec 2012 09:34:26 -0500 Subject: [PATCH 05/12] BUG: updated the copy method to work with supplied options --- RELEASE.rst | 2 +- doc/source/io.rst | 8 ++-- pandas/io/pytables.py | 63 ++++++++++++++++++++++++++----- pandas/io/tests/test_pytables.py | 64 ++++++++++++++++++++++++-------- 4 files changed, 107 insertions(+), 30 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 9ce4f170f36af..f2753e0b9a1c6 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -50,7 +50,7 @@ pandas 0.10.1 to do multiple-table append/selection - added support for datetime64 in columns - added method ``unique`` to select the unique values in an indexable or data column - - added method ``copy_to`` to copy an existing store (and possibly upgrade) + - added method ``copy`` to copy an existing store (and possibly upgrade) - Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565) - Support reading gzipped data from file-like object - ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_) diff --git a/doc/source/io.rst b/doc/source/io.rst index 0a2c225df375d..1b2a46225bad1 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1343,7 +1343,7 @@ Or on-the-fly compression (this only applies to tables). You can turn off file c - ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5`` -Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow you to reuse previously deleted space (alternatively, one can simply remove the file and write again). +Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow you to reuse previously deleted space. Aalternatively, one can simply remove the file and write again, or use the ``copy`` method. Notes & Caveats ~~~~~~~~~~~~~~~ @@ -1367,9 +1367,7 @@ Notes & Caveats Compatibility ~~~~~~~~~~~~~ -0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas -however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire -file and write it out using the new format to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. +0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire file and write it out using the new format, using the method ``copy`` to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. ``copy`` takes a number of options, please see the docstring. .. ipython:: python @@ -1380,7 +1378,7 @@ file and write it out using the new format to take advantage of the updates. The legacy_store # copy (and return the new handle) - new_store = legacy_store.copy_to('store_new.h5') + new_store = legacy_store.copy('store_new.h5') new_store new_store.close() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fec80a1855578..60c5246b77af5 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -630,15 +630,45 @@ def get_table(self, key): t.infer_axes() return t - def copy_to(self, file): - """ copy the existing store to a new file, upgrading in place """ - new_store = HDFStore(file, mode = 'w') - for k, g in self.iteritems(): - data = self.select(k) - if _is_table_type(g): - new_store.append(k,data) - else: - new_store.put(k,data) + def is_table(self, key): + """ return a boolean if I am a table """ + group = self.get_node(key) + if group is None: + raise KeyError('No object named %s in the file' % key) + return _is_table_type(group) + + def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False): + """ copy the existing store to a new file, upgrading in place + + Parameters + ---------- + propindexes: restore indexes in copied file (defaults to True) + keys : list of keys to include in the copy (defaults to all) + mode, complib, complevel, fletcher32 same as in HDFStore.__init__ + + Returns + ------- + open file handle of the new store + + """ + new_store = HDFStore(file, mode = mode, complib = complib, complevel = complevel, fletcher32 = fletcher32) + if keys is None: + keys = self.keys() + if not isinstance(keys, (tuple,list)): + keys = [ keys ] + for k in keys: + n = self.get_node(k) + if n is not None: + data = self.select(k) + if _is_table_type(n): + + t = self.get_table(k) + index = False + if propindexes: + index = [ a.name for a in t.axes if a.is_indexed ] + new_store.append(k,data, index=index, data_columns=getattr(t,'data_columns',None)) + else: + new_store.put(k,data) return new_store ###### private methods ###### @@ -1131,6 +1161,14 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) + @property + def is_indexed(self): + """ return whether I am an indexed column """ + try: + return getattr(self.table.cols,self.cname).is_indexed + except: + False + def copy(self): new_self = copy.copy(self) return new_self @@ -1543,6 +1581,13 @@ def __repr__(self): __str__ = __repr__ + def __getitem__(self, c): + """ return the axis for c """ + for a in self.axes: + if c == a.name: + return a + return None + def copy(self): new_self = copy.copy(self) return new_self diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index b790d32706f72..febb571cf45b6 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1735,25 +1735,59 @@ def test_legacy_0_10_read(self): store.select(k) store.close() - def test_legacy_copy_to(self): + def test_copy(self): pth = curpath() + def do_copy(f = None, keys = None, propindexes = True, **kwargs): + try: + import os + + if f is None: + f = os.path.join(pth, 'legacy_0.10.h5') + + store = HDFStore(f, 'r') + import tempfile + tmp = tempfile.mkstemp()[1] + tstore = store.copy(tmp, keys = keys, propindexes = propindexes, **kwargs) + + # check keys + if keys is None: + keys = store.keys() + self.assert_(set(keys) == set(tstore.keys())) + + # check indicies & nrows + for k in tstore.keys(): + if tstore.is_table(k): + new_t = tstore.get_table(k) + orig_t = store.get_table(k) + + self.assert_(orig_t.nrows == new_t.nrows) + for a in orig_t.axes: + if a.is_indexed: + self.assert_(new_t[a.name].is_indexed == True) + + except: + pass + finally: + store.close() + tstore.close() + import os + os.remove(tmp) + + do_copy() + do_copy(keys = ['df']) + do_copy(propindexes = False) + + # new table + df = tm.makeDataFrame() try: - import os - store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r') - import tempfile - tmp = tempfile.mkstemp()[1] - tstore = store.copy_to(tmp) - - # the tmp store - for k in tstore.keys(): - self.assert_(k in store) - except: - pass + st = HDFStore(self.scratchpath) + st.append('df', df, data_columns = ['A']) + st.close() + do_copy(f = self.scratchpath) + do_copy(f = self.scratchpath, propindexes = False) finally: - store.close() - tstore.close() import os - os.remove(tmp) + os.remove(self.scratchpath) def test_legacy_table_write(self): raise nose.SkipTest From 4a1510db42aa431d11504f30d8f867825d2cc28e Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 29 Dec 2012 09:47:38 -0500 Subject: [PATCH 06/12] ENH: added overwrite parameter to copy (defaults to True) --- pandas/io/pytables.py | 11 +++++++++-- pandas/io/tests/test_pytables.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 60c5246b77af5..74bb2dcfb48de 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -637,13 +637,14 @@ def is_table(self, key): raise KeyError('No object named %s in the file' % key) return _is_table_type(group) - def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False): + def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False, overwrite = True): """ copy the existing store to a new file, upgrading in place Parameters ---------- propindexes: restore indexes in copied file (defaults to True) keys : list of keys to include in the copy (defaults to all) + overwrite : overwrite (remove and replace) existing nodes in the new store (default is True) mode, complib, complevel, fletcher32 same as in HDFStore.__init__ Returns @@ -659,6 +660,11 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None for k in keys: n = self.get_node(k) if n is not None: + + if k in new_store: + if overwrite: + new_store.remove(k) + data = self.select(k) if _is_table_type(n): @@ -666,9 +672,10 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None index = False if propindexes: index = [ a.name for a in t.axes if a.is_indexed ] - new_store.append(k,data, index=index, data_columns=getattr(t,'data_columns',None)) + new_store.append(k,data, index=index, data_columns=getattr(t,'data_columns',None)) else: new_store.put(k,data) + return new_store ###### private methods ###### diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index febb571cf45b6..4ce89057a1a46 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1737,7 +1737,7 @@ def test_legacy_0_10_read(self): def test_copy(self): pth = curpath() - def do_copy(f = None, keys = None, propindexes = True, **kwargs): + def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): try: import os @@ -1745,9 +1745,12 @@ def do_copy(f = None, keys = None, propindexes = True, **kwargs): f = os.path.join(pth, 'legacy_0.10.h5') store = HDFStore(f, 'r') - import tempfile - tmp = tempfile.mkstemp()[1] - tstore = store.copy(tmp, keys = keys, propindexes = propindexes, **kwargs) + + if new_f is None: + import tempfile + new_f = tempfile.mkstemp()[1] + + tstore = store.copy(new_f, keys = keys, propindexes = propindexes, **kwargs) # check keys if keys is None: @@ -1771,7 +1774,7 @@ def do_copy(f = None, keys = None, propindexes = True, **kwargs): store.close() tstore.close() import os - os.remove(tmp) + os.remove(new_f) do_copy() do_copy(keys = ['df']) From 375248ea6848dd1a5a7f27c136361826206d3506 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 4 Jan 2013 16:42:42 -0500 Subject: [PATCH 07/12] ENH/BUG: refactor of non-table HDFStore to put in object form (Storer class), for future enhancements added shape to non-table display closes GH2637 --- RELEASE.rst | 6 +- doc/source/io.rst | 4 +- pandas/io/pytables.py | 1539 ++++++++++++++++-------------- pandas/io/tests/test_pytables.py | 51 +- 4 files changed, 859 insertions(+), 741 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index f2753e0b9a1c6..d45576933f941 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -51,6 +51,7 @@ pandas 0.10.1 - added support for datetime64 in columns - added method ``unique`` to select the unique values in an indexable or data column - added method ``copy`` to copy an existing store (and possibly upgrade) + - show the shape of the data on disk for non-table stores when printing the store - Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565) - Support reading gzipped data from file-like object - ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_) @@ -64,6 +65,7 @@ pandas 0.10.1 - raise correctly on non-implemented column types (unicode/date) - handle correctly ``Term`` passed types (e.g. ``index<1000``, when index is ``Int64``), (closes GH512_) + - handle Timestamp correctly in data_columns (closes GH2637_) - Fix DataFrame.info bug with UTF8-encoded columns. (GH2576_) - Fix DatetimeIndex handling of FixedOffset tz (GH2604_) - More robust detection of being in IPython session for wide DataFrame @@ -79,18 +81,20 @@ pandas 0.10.1 - ``HDFStore`` + - refactored HFDStore to deal with non-table stores as objects, will allow future enhancements - removed keyword ``compression`` from ``put`` (replaced by keyword ``complib`` to be consistent across library) .. _GH512: https://github.com/pydata/pandas/issues/512 .. _GH1277: https://github.com/pydata/pandas/issues/1277 .. _GH2327: https://github.com/pydata/pandas/issues/2327 -.. _GH2576: https://github.com/pydata/pandas/issues/2576 .. _GH2585: https://github.com/pydata/pandas/issues/2585 .. _GH2604: https://github.com/pydata/pandas/issues/2604 +.. _GH2576: https://github.com/pydata/pandas/issues/2576 .. _GH2616: https://github.com/pydata/pandas/issues/2616 .. _GH2625: https://github.com/pydata/pandas/issues/2625 .. _GH2643: https://github.com/pydata/pandas/issues/2643 +.. _GH2637: https://github.com/pydata/pandas/issues/2637 pandas 0.10.0 ============= diff --git a/doc/source/io.rst b/doc/source/io.rst index 1b2a46225bad1..1cbdfaadbcb33 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1263,11 +1263,11 @@ To retrieve the *unique* values of an indexable or data column, use the method ` **Table Object** -If you want to inspect the table object, retrieve via ``get_table``. You could use this progamatically to say get the number of rows in the table. +If you want to inspect the stored object, retrieve via ``get_storer``. You could use this progamatically to say get the number of rows in an object. .. ipython:: python - store.get_table('df_dc').nrows + store.get_storer('df_dc').nrows Multiple Table Queries ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 74bb2dcfb48de..73db793069817 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -43,43 +43,42 @@ class IncompatibilityWarning(Warning): pass read the file in and write it out to a new file to upgrade (with the copy_to method) """ -# reading and writing the full object in one go +# map object types _TYPE_MAP = { - Series: 'series', - SparseSeries: 'sparse_series', - TimeSeries: 'series', - DataFrame: 'frame', - SparseDataFrame: 'sparse_frame', - Panel: 'wide', - Panel4D: 'ndim', - SparsePanel: 'sparse_panel' + + Series : 'series', + SparseSeries : 'sparse_series', + TimeSeries : 'series', + DataFrame : 'frame', + SparseDataFrame : 'sparse_frame', + Panel : 'wide', + Panel4D : 'ndim', + SparsePanel : 'sparse_panel' } -_NAME_MAP = { - 'series': 'Series', - 'time_series': 'TimeSeries', - 'sparse_series': 'SparseSeries', - 'frame': 'DataFrame', - 'sparse_frame': 'SparseDataFrame', - 'frame_table': 'DataFrame (Table)', - 'wide': 'Panel', - 'sparse_panel': 'SparsePanel', - 'wide_table': 'Panel (Table)', - 'long': 'LongPanel', - # legacy h5 files - 'Series': 'Series', - 'TimeSeries': 'TimeSeries', - 'DataFrame': 'DataFrame', - 'DataMatrix': 'DataMatrix' +# storer class map +_STORER_MAP = { + 'TimeSeries' : 'LegacySeriesStorer', + 'Series' : 'LegacySeriesStorer', + 'DataFrame' : 'LegacyFrameStorer', + 'DataMatrix' : 'LegacyFrameStorer', + 'series' : 'SeriesStorer', + 'sparse_series' : 'SparseSeriesStorer', + 'frame' : 'FrameStorer', + 'sparse_frame' : 'SparseFrameStorer', + 'wide' : 'PanelStorer', + 'sparse_panel' : 'SparsePanelStorer', } -# legacy handlers -_LEGACY_MAP = { - 'Series': 'legacy_series', - 'TimeSeries': 'legacy_series', - 'DataFrame': 'legacy_frame', - 'DataMatrix': 'legacy_frame', - 'WidePanel': 'wide_table', +# table class map +_TABLE_MAP = { + 'appendable_frame' : 'AppendableFrameTable', + 'appendable_multiframe' : 'AppendableMultiFrameTable', + 'appendable_panel' : 'AppendablePanelTable', + 'appendable_ndim' : 'AppendableNDimTable', + 'worm' : 'WORMTable', + 'legacy_frame' : 'LegacyFrameTable', + 'legacy_panel' : 'LegacyPanelTable', } # axes map @@ -227,28 +226,17 @@ def __len__(self): def __repr__(self): output = '%s\nFile path: %s\n' % (type(self), self.path) - groups = self.groups() - if len(groups) > 0: - keys = [] + if len(self.keys()): + keys = [] values = [] - for n in sorted(groups, key=lambda x: x._v_name): - kind = getattr(n._v_attrs, 'pandas_type', None) - - keys.append(str(n._v_pathname)) - - # a table - if _is_table_type(n): - values.append(str(create_table(self, n))) - # a group - elif kind is None: - values.append('unknown type') + for k in self.keys(): + s = self.get_storer(k) + if s is not None: + keys.append(str(s.pathname)) + values.append(str(s)) - # another type of pandas object - else: - values.append(_NAME_MAP[kind]) - - output += adjoin(5, keys, values) + output += adjoin(12, keys, values) else: output += 'Empty' @@ -370,7 +358,7 @@ def select_as_coordinates(self, key, where=None, **kwargs): ------------------- where : list of Term (or convertable) objects, optional """ - return self.get_table(key).read_coordinates(where=where, **kwargs) + return self.get_storer(key).read_coordinates(where = where, **kwargs) def unique(self, key, column, **kwargs): """ @@ -387,7 +375,7 @@ def unique(self, key, column, **kwargs): raises ValueError if the column can not be extracted indivually (it is part of a data block) """ - return self.get_table(key).read_column(column=column, **kwargs) + return self.get_storer(key).read_column(column = column, **kwargs) def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kwargs): """ Retrieve pandas objects from multiple tables @@ -419,13 +407,15 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw selector = keys[0] # collect the tables - tbls = [self.get_table(k) for k in keys] + tbls = [ self.get_storer(k) for k in keys ] # validate rows nrows = tbls[0].nrows for t in tbls: if t.nrows != nrows: raise Exception("all tables must have exactly the same nrows!") + if not t.is_table: + raise Exception("object [%s] is not a table, and cannot be used in all select as multiple" % t.pathname) # select coordinates from the selector table c = self.select_as_coordinates(selector, where) @@ -439,7 +429,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, **kw # concat and return return concat(objs, axis=axis, verify_integrity=True) - def put(self, key, value, table=False, append=False, **kwargs): + def put(self, key, value, table=None, append=False, **kwargs): """ Store object in HDFStore @@ -477,22 +467,31 @@ def remove(self, key, where=None, start=None, stop=None): number of rows removed (or None if not a Table) """ - group = self.get_node(key) - if group is not None: + try: + s = self.get_storer(key) + except: - # remove the node - if where is None: - group = self.get_node(key) - group._f_remove(recursive=True) + if where is not None: + raise Exception("trying to remove a node with a non-None where clause!") - # delete from the table - else: - if not _is_table_type(group): - raise Exception('can only remove with where on objects written as tables') - t = create_table(self, group) - return t.delete(where=where, start=start, stop=stop) + # we are actually trying to remove a node (with children) + s = self.get_node(key) + if s is not None: + s._f_remove(recursive=True) + return None - return None + if s is None: + return None + + # remove the node + if where is None: + s.group._f_remove(recursive=True) + + # delete from the table + else: + if not s.is_table: + raise Exception('can only remove with where on objects written as tables') + return s.delete(where = where, start=start, stop=stop) def append(self, key, value, columns=None, **kwargs): """ @@ -598,17 +597,16 @@ def create_table_index(self, key, **kwargs): if not _table_supports_index: raise Exception("PyTables >= 2.3 is required for table indexing") - group = self.get_node(key) - if group is None: - return + s = self.get_storer(key) + if s is None: return - if not _is_table_type(group): + if not s.is_table: raise Exception("cannot create table index on a non-table") - create_table(self, group).create_index(**kwargs) + s.create_index(**kwargs) def groups(self): - """ return a list of all the groups (that are not themselves a pandas storage object) """ - return [g for g in self.handle.walkGroups() if getattr(g._v_attrs, 'pandas_type', None)] + """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ + return [ g for g in self.handle.walkGroups() if getattr(g._v_attrs,'pandas_type',None) ] def get_node(self, key): """ return the node with the key or None if it does not exist """ @@ -619,23 +617,14 @@ def get_node(self, key): except: return None - def get_table(self, key): - """ return the table object for a key, raise if not in the file or a non-table """ + def get_storer(self, key): + """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) - if group is None: - raise KeyError('No object named %s in the file' % key) - if not _is_table_type(group): - raise Exception("cannot return a table object for a non-table") - t = create_table(self, group) - t.infer_axes() - return t - - def is_table(self, key): - """ return a boolean if I am a table """ - group = self.get_node(key) - if group is None: - raise KeyError('No object named %s in the file' % key) - return _is_table_type(group) + if group is None: + return None + s = self._create_storer(group) + s.infer_axes() + return s def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False, overwrite = True): """ copy the existing store to a new file, upgrading in place @@ -658,21 +647,20 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None if not isinstance(keys, (tuple,list)): keys = [ keys ] for k in keys: - n = self.get_node(k) - if n is not None: + s = self.get_storer(k) + if s is not None: if k in new_store: if overwrite: new_store.remove(k) data = self.select(k) - if _is_table_type(n): + if s.is_table: - t = self.get_table(k) index = False if propindexes: - index = [ a.name for a in t.axes if a.is_indexed ] - new_store.append(k,data, index=index, data_columns=getattr(t,'data_columns',None)) + index = [ a.name for a in s.axes if a.is_indexed ] + new_store.append(k,data, index=index, data_columns=getattr(s,'data_columns',None)) else: new_store.put(k,data) @@ -680,12 +668,69 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None ###### private methods ###### - def _get_handler(self, op, kind): - return getattr(self, '_%s_%s' % (op, kind)) + def _create_storer(self, group, value = None, table = False, append = False, **kwargs): + """ return a suitable Storer class to operate """ + + pt = getattr(group._v_attrs,'pandas_type',None) + tt = getattr(group._v_attrs,'table_type',None) + + # infer the typ from the passed value + if pt is None: + if value is None: + raise Exception("cannot create a storer if the object is not existing nor a value are passed") + + pt = _TYPE_MAP[type(value)] + + # we are actually a table + if table or append: + pt += '_table' + + # a storer node + if 'table' not in pt: + try: + return globals()[_STORER_MAP[pt]](self, group, **kwargs) + except: + raise Exception("cannot properly create the storer for: [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % + (group,value,table,append,kwargs)) + + # existing node (and must be a table) + if tt is None: + + # if we are a writer, determin the tt + if value is not None: + + if pt == 'frame_table': + tt = 'appendable_frame' if value.index.nlevels == 1 else 'appendable_multiframe' + elif pt == 'wide_table': + tt = 'appendable_panel' + elif pt == 'ndim_table': + tt = 'appendable_ndim' + + else: + + # distiguish between a frame/table + tt = 'legacy_panel' + try: + fields = group.table._v_attrs.fields + if len(fields) == 1 and fields[0] == 'value': + tt = 'legacy_frame' + except: + pass - def _write_to_group(self, key, value, table=False, append=False, - complib=None, **kwargs): + try: + return globals()[_TABLE_MAP[tt or 'appendable_panel']](self, group, **kwargs) + except: + raise Exception("cannot properly create the storer for: [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % + (group,value,table,append,kwargs)) + + def _write_to_group(self, key, value, index=True, table=False, append=False, complib=None, **kwargs): group = self.get_node(key) + + # remove the node if we are not appending + if group is not None and not append: + self.handle.removeNode(group, recursive=True) + group = None + if group is None: paths = key.split('/') @@ -703,396 +748,28 @@ def _write_to_group(self, key, value, table=False, append=False, group = self.handle.createGroup(path, p) path = new_path - kind = _TYPE_MAP[type(value)] - if table or (append and _is_table_type(group)): - kind = '%s_table' % kind - handler = self._get_handler(op='write', kind=kind) - wrapper = lambda value: handler(group, value, append=append, - complib=complib, **kwargs) - else: - if append: + s = self._create_storer(group, value, table=table, append=append, **kwargs) + if append: + # raise if we are trying to append to a non-table, + # or a table that exists (and we are putting) + if not s.is_table or (s.is_table and table is None and s.is_exists): raise ValueError('Can only append to Tables') - if complib: - raise ValueError('Compression only supported on Tables') - - handler = self._get_handler(op='write', kind=kind) - wrapper = lambda value: handler(group, value) - - group._v_attrs.pandas_type = kind - group._v_attrs.pandas_version = _version - wrapper(value) - - def _write_series(self, group, series): - self._write_index(group, 'index', series.index) - self._write_array(group, 'values', series.values) - group._v_attrs.name = series.name - - def _write_sparse_series(self, group, series): - self._write_index(group, 'index', series.index) - self._write_index(group, 'sp_index', series.sp_index) - self._write_array(group, 'sp_values', series.sp_values) - group._v_attrs.name = series.name - group._v_attrs.fill_value = series.fill_value - group._v_attrs.kind = series.kind - - def _read_sparse_series(self, group, where=None): - index = self._read_index(group, 'index') - sp_values = _read_array(group, 'sp_values') - sp_index = self._read_index(group, 'sp_index') - name = getattr(group._v_attrs, 'name', None) - fill_value = getattr(group._v_attrs, 'fill_value', None) - kind = getattr(group._v_attrs, 'kind', 'block') - return SparseSeries(sp_values, index=index, sparse_index=sp_index, - kind=kind, fill_value=fill_value, - name=name) - - def _write_sparse_frame(self, group, sdf): - for name, ss in sdf.iteritems(): - key = 'sparse_series_%s' % name - if key not in group._v_children: - node = self.handle.createGroup(group, key) - else: - node = getattr(group, key) - self._write_sparse_series(node, ss) - setattr(group._v_attrs, 'default_fill_value', - sdf.default_fill_value) - setattr(group._v_attrs, 'default_kind', - sdf.default_kind) - self._write_index(group, 'columns', sdf.columns) - - def _read_sparse_frame(self, group, where=None): - columns = self._read_index(group, 'columns') - sdict = {} - for c in columns: - key = 'sparse_series_%s' % c - node = getattr(group, key) - sdict[c] = self._read_sparse_series(node) - default_kind = getattr(group._v_attrs, 'default_kind') - default_fill_value = getattr(group._v_attrs, 'default_fill_value') - return SparseDataFrame(sdict, columns=columns, - default_kind=default_kind, - default_fill_value=default_fill_value) - - def _write_sparse_panel(self, group, swide): - setattr(group._v_attrs, 'default_fill_value', swide.default_fill_value) - setattr(group._v_attrs, 'default_kind', swide.default_kind) - self._write_index(group, 'items', swide.items) - - for name, sdf in swide.iteritems(): - key = 'sparse_frame_%s' % name - if key not in group._v_children: - node = self.handle.createGroup(group, key) - else: - node = getattr(group, key) - self._write_sparse_frame(node, sdf) - - def _read_sparse_panel(self, group, where=None): - default_fill_value = getattr(group._v_attrs, 'default_fill_value') - default_kind = getattr(group._v_attrs, 'default_kind') - items = self._read_index(group, 'items') - - sdict = {} - for name in items: - key = 'sparse_frame_%s' % name - node = getattr(group, key) - sdict[name] = self._read_sparse_frame(node) - return SparsePanel(sdict, items=items, default_kind=default_kind, - default_fill_value=default_fill_value) - - def _write_frame(self, group, df): - self._write_block_manager(group, df._data) - - def _read_frame(self, group, where=None, **kwargs): - return DataFrame(self._read_block_manager(group)) - - def _write_block_manager(self, group, data): - if not data.is_consolidated(): - data = data.consolidate() - - group._v_attrs.ndim = data.ndim - for i, ax in enumerate(data.axes): - self._write_index(group, 'axis%d' % i, ax) - - # Supporting mixed-type DataFrame objects...nontrivial - nblocks = len(data.blocks) - group._v_attrs.nblocks = nblocks - for i in range(nblocks): - blk = data.blocks[i] - # I have no idea why, but writing values before items fixed #2299 - self._write_array(group, 'block%d_values' % i, blk.values) - self._write_index(group, 'block%d_items' % i, blk.items) - - def _read_block_manager(self, group): - ndim = group._v_attrs.ndim - - axes = [] - for i in xrange(ndim): - ax = self._read_index(group, 'axis%d' % i) - axes.append(ax) - - items = axes[0] - blocks = [] - for i in range(group._v_attrs.nblocks): - blk_items = self._read_index(group, 'block%d_items' % i) - values = _read_array(group, 'block%d_values' % i) - blk = make_block(values, blk_items, items) - blocks.append(blk) - - return BlockManager(blocks, axes) - - def _write_wide(self, group, panel): - panel._consolidate_inplace() - self._write_block_manager(group, panel._data) - - def _read_wide(self, group, where=None, **kwargs): - return Panel(self._read_block_manager(group)) - - def _write_ndim_table(self, group, obj, append=False, axes=None, index=True, **kwargs): - if axes is None: - axes = _AXES_MAP[type(obj)] - t = create_table(self, group, typ='appendable_ndim') - t.write(axes=axes, obj=obj, append=append, **kwargs) - if index: - t.create_index(columns=index) - - def _read_ndim_table(self, group, where=None, **kwargs): - t = create_table(self, group, **kwargs) - return t.read(where, **kwargs) - - def _write_frame_table(self, group, df, append=False, axes=None, index=True, **kwargs): - if axes is None: - axes = _AXES_MAP[type(df)] - - t = create_table(self, group, typ='appendable_frame' if df.index.nlevels == 1 else 'appendable_multiframe') - t.write(axes=axes, obj=df, append=append, **kwargs) - if index: - t.create_index(columns=index) - - _read_frame_table = _read_ndim_table - - def _write_wide_table(self, group, panel, append=False, axes=None, index=True, **kwargs): - if axes is None: - axes = _AXES_MAP[type(panel)] - t = create_table(self, group, typ='appendable_panel') - t.write(axes=axes, obj=panel, append=append, **kwargs) - if index: - t.create_index(columns=index) - - _read_wide_table = _read_ndim_table - - def _write_index(self, group, key, index): - if isinstance(index, MultiIndex): - setattr(group._v_attrs, '%s_variety' % key, 'multi') - self._write_multi_index(group, key, index) - elif isinstance(index, BlockIndex): - setattr(group._v_attrs, '%s_variety' % key, 'block') - self._write_block_index(group, key, index) - elif isinstance(index, IntIndex): - setattr(group._v_attrs, '%s_variety' % key, 'sparseint') - self._write_sparse_intindex(group, key, index) - else: - setattr(group._v_attrs, '%s_variety' % key, 'regular') - converted = _convert_index(index).set_name('index') - self._write_array(group, key, converted.values) - node = getattr(group, key) - node._v_attrs.kind = converted.kind - node._v_attrs.name = index.name - - if isinstance(index, (DatetimeIndex, PeriodIndex)): - node._v_attrs.index_class = _class_to_alias(type(index)) - - if hasattr(index, 'freq'): - node._v_attrs.freq = index.freq - - if hasattr(index, 'tz') and index.tz is not None: - zone = tslib.get_timezone(index.tz) - if zone is None: - zone = tslib.tot_seconds(index.tz.utcoffset()) - node._v_attrs.tz = zone - - def _read_index(self, group, key): - variety = getattr(group._v_attrs, '%s_variety' % key) - - if variety == 'multi': - return self._read_multi_index(group, key) - elif variety == 'block': - return self._read_block_index(group, key) - elif variety == 'sparseint': - return self._read_sparse_intindex(group, key) - elif variety == 'regular': - _, index = self._read_index_node(getattr(group, key)) - return index - else: # pragma: no cover - raise Exception('unrecognized index variety: %s' % variety) - - def _write_block_index(self, group, key, index): - self._write_array(group, '%s_blocs' % key, index.blocs) - self._write_array(group, '%s_blengths' % key, index.blengths) - setattr(group._v_attrs, '%s_length' % key, index.length) - - def _read_block_index(self, group, key): - length = getattr(group._v_attrs, '%s_length' % key) - blocs = _read_array(group, '%s_blocs' % key) - blengths = _read_array(group, '%s_blengths' % key) - return BlockIndex(length, blocs, blengths) - - def _write_sparse_intindex(self, group, key, index): - self._write_array(group, '%s_indices' % key, index.indices) - setattr(group._v_attrs, '%s_length' % key, index.length) - - def _read_sparse_intindex(self, group, key): - length = getattr(group._v_attrs, '%s_length' % key) - indices = _read_array(group, '%s_indices' % key) - return IntIndex(length, indices) - - def _write_multi_index(self, group, key, index): - setattr(group._v_attrs, '%s_nlevels' % key, index.nlevels) - - for i, (lev, lab, name) in enumerate(zip(index.levels, - index.labels, - index.names)): - # write the level - level_key = '%s_level%d' % (key, i) - conv_level = _convert_index(lev).set_name(level_key) - self._write_array(group, level_key, conv_level.values) - node = getattr(group, level_key) - node._v_attrs.kind = conv_level.kind - node._v_attrs.name = name - - # write the name - setattr(node._v_attrs, '%s_name%d' % (key, i), name) - - # write the labels - label_key = '%s_label%d' % (key, i) - self._write_array(group, label_key, lab) - - def _read_multi_index(self, group, key): - nlevels = getattr(group._v_attrs, '%s_nlevels' % key) - - levels = [] - labels = [] - names = [] - for i in range(nlevels): - level_key = '%s_level%d' % (key, i) - name, lev = self._read_index_node(getattr(group, level_key)) - levels.append(lev) - names.append(name) - - label_key = '%s_label%d' % (key, i) - lab = _read_array(group, label_key) - labels.append(lab) - - return MultiIndex(levels=levels, labels=labels, names=names) - - def _read_index_node(self, node): - data = node[:] - kind = node._v_attrs.kind - name = None - - if 'name' in node._v_attrs: - name = node._v_attrs.name - - index_class = _alias_to_class(getattr(node._v_attrs, - 'index_class', '')) - factory = _get_index_factory(index_class) - - kwargs = {} - if 'freq' in node._v_attrs: - kwargs['freq'] = node._v_attrs['freq'] - - if 'tz' in node._v_attrs: - kwargs['tz'] = node._v_attrs['tz'] - - if kind in ('date', 'datetime'): - index = factory(_unconvert_index(data, kind), dtype=object, - **kwargs) - else: - index = factory(_unconvert_index(data, kind), **kwargs) - - index.name = name - - return name, index - - def _write_array(self, group, key, value): - if key in group: - self.handle.removeNode(group, key) - - # Transform needed to interface with pytables row/col notation - empty_array = any(x == 0 for x in value.shape) - transposed = False - - if not empty_array: - value = value.T - transposed = True - - if self.filters is not None: - atom = None - try: - # get the atom for this datatype - atom = _tables().Atom.from_dtype(value.dtype) - except ValueError: - pass - - if atom is not None: - # create an empty chunked array and fill it from value - ca = self.handle.createCArray(group, key, atom, - value.shape, - filters=self.filters) - ca[:] = value - getattr(group, key)._v_attrs.transposed = transposed - return - - if value.dtype.type == np.object_: - vlarr = self.handle.createVLArray(group, key, - _tables().ObjectAtom()) - vlarr.append(value) - elif value.dtype.type == np.datetime64: - self.handle.createArray(group, key, value.view('i8')) - getattr(group, key)._v_attrs.value_type = 'datetime64' - else: - if empty_array: - # ugly hack for length 0 axes - arr = np.empty((1,) * value.ndim) - self.handle.createArray(group, key, arr) - getattr(group, key)._v_attrs.value_type = str(value.dtype) - getattr(group, key)._v_attrs.shape = value.shape - else: - self.handle.createArray(group, key, value) - - getattr(group, key)._v_attrs.transposed = transposed - - def _read_group(self, group, where=None, **kwargs): - kind = group._v_attrs.pandas_type - kind = _LEGACY_MAP.get(kind, kind) - handler = self._get_handler(op='read', kind=kind) - return handler(group, where=where, **kwargs) - - def _read_series(self, group, where=None, **kwargs): - index = self._read_index(group, 'index') - if len(index) > 0: - values = _read_array(group, 'values') + if not s.is_exists: + s.set_info() else: - values = [] - - name = getattr(group._v_attrs, 'name', None) - return Series(values, index=index, name=name) + s.set_info() - def _read_legacy_series(self, group, where=None, **kwargs): - index = self._read_index_legacy(group, 'index') - values = _read_array(group, 'values') - return Series(values, index=index) + if not s.is_table and complib: + raise ValueError('Compression not supported on non-table') - def _read_legacy_frame(self, group, where=None, **kwargs): - index = self._read_index_legacy(group, 'index') - columns = self._read_index_legacy(group, 'columns') - values = _read_array(group, 'values') - return DataFrame(values, index=index, columns=columns) + s.write(obj = value, append=append, complib=complib, **kwargs) + if s.is_table and index: + s.create_index(columns = index) - def _read_index_legacy(self, group, key): - node = getattr(group, key) - data = node[:] - kind = node._v_attrs.kind - return _unconvert_index_legacy(data, kind) + def _read_group(self, group, **kwargs): + s = self._create_storer(group) + s.infer_axes() + return s.read(**kwargs) class IndexCol(object): @@ -1509,10 +1186,9 @@ def get_atom_data(self, block): def get_atom_datetime64(self, block): return _tables().Int64Col() - -class Table(object): - """ represent a table: - facilitate read/write of various types of tables +class Storer(object): + """ represent an object in my store + facilitate read/write of various types of objects this is an abstract base class Parameters @@ -1520,31 +1196,24 @@ class Table(object): parent : my parent HDFStore group : the group node where the table resides - - Attrs in Table Node - ------------------- - These are attributes that are store in the main table node, they are necessary - to recreate these tables when read back in. - - index_axes : a list of tuples of the (original indexing axis and index column) - non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis) - values_axes : a list of the columns which comprise the data of this table - data_columns : a list of the columns that we are allowing indexing (these become single columns in values_axes) - nan_rep : the string to use for nan representations for string objects - levels : the names of levels - """ - table_type = None - obj_type = None - ndim = None - levels = 1 + pandas_kind = None + obj_type = None + ndim = None + is_table = False def __init__(self, parent, group, **kwargs): - self.parent = parent - self.group = group + self.parent = parent + self.group = group + self.set_version() + + @property + def is_old_version(self): + return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 - # compute our version - version = getattr(group._v_attrs, 'pandas_version', None) + def set_version(self): + """ compute and set our version """ + version = getattr(self.group._v_attrs,'pandas_version',None) try: self.version = tuple([int(x) for x in version.split('.')]) if len(self.version) == 2: @@ -1552,21 +1221,6 @@ def __init__(self, parent, group, **kwargs): except: self.version = (0, 0, 0) - self.index_axes = [] - self.non_index_axes = [] - self.values_axes = [] - self.data_columns = [] - self.nan_rep = None - self.selection = None - - @property - def is_old_version(self): - return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 - - @property - def table_type_short(self): - return self.table_type.split('_')[0] - @property def pandas_type(self): return getattr(self.group._v_attrs, 'pandas_type', None) @@ -1574,58 +1228,31 @@ def pandas_type(self): def __repr__(self): """ return a pretty representatgion of myself """ self.infer_axes() - dc = ",dc->[%s]" % ','.join(self.data_columns) if len(self.data_columns) else '' - ver = '' - if self.is_old_version: - ver = "[%s]" % '.'.join([ str(x) for x in self.version ]) - - return "%s%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, - ver, - self.table_type_short, - self.nrows, - ','.join([ a.name for a in self.index_axes ]), - dc) + s = self.shape + if s is not None: + return "%-12.12s (shape->%s)" % (self.pandas_type,s) + return self.pandas_type - __str__ = __repr__ + def __str__(self): + return self.__repr__() - def __getitem__(self, c): - """ return the axis for c """ - for a in self.axes: - if c == a.name: - return a - return None + def set_info(self): + """ set my pandas type & version """ + self.attrs.pandas_type = self.pandas_kind + self.attrs.pandas_version = _version + self.set_version() def copy(self): new_self = copy.copy(self) return new_self - def validate(self, other): - """ validate against an existing table """ - if other is None: - return - - if other.table_type != self.table_type: - raise TypeError("incompatible table_type with existing [%s - %s]" % - (other.table_type, self.table_type)) - - for c in ['index_axes', 'non_index_axes', 'values_axes']: - if getattr(self, c, None) != getattr(other, c, None): - raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" - % (c, getattr(self, c, None), getattr(other, c, None))) - @property - def nrows(self): - return getattr(self.table, 'nrows', None) - - @property - def nrows_expected(self): - """ based on our axes, compute the expected nrows """ - return np.prod([i.cvalues.shape[0] for i in self.index_axes]) + def shape(self): + return self.nrows @property - def table(self): - """ return the table group """ - return getattr(self.group, 'table', None) + def pathname(self): + return self.group._v_pathname @property def handle(self): @@ -1655,6 +1282,603 @@ def complib(self): def attrs(self): return self.group._v_attrs + def set_attrs(self): + """ set our object attributes """ + pass + + def get_attrs(self): + """ get our object attributes """ + pass + + @property + def storable(self): + """ return my storable """ + return self.group + + @property + def is_exists(self): + return False + + @property + def nrows(self): + return getattr(self.storable,'nrows',None) + + def validate(self, other): + """ validate against an existing storable """ + if other is None: return + return True + + def validate_version(self, where = None): + """ are we trying to operate on an old version? """ + return True + + def infer_axes(self): + """ infer the axes of my storer + return a boolean indicating if we have a valid storer or not """ + + s = self.storable + if s is None: + return False + self.get_attrs() + return True + + def read(self, **kwargs): + raise NotImplementedError("cannot read on an abstract storer: subclasses should implement") + + def write(self, **kwargs): + raise NotImplementedError("cannot write on an abstract storer: sublcasses should implement") + + def delete(self, where = None, **kwargs): + """ support fully deleting the node in its entirety (only) - where specification must be None """ + if where is None: + self.handle.removeNode(self.group, recursive=True) + return None + + raise NotImplementedError("cannot delete on an abstract storer") + +class GenericStorer(Storer): + """ a generified storer version """ + _index_type_map = { DatetimeIndex: 'datetime', + PeriodIndex: 'period'} + _reverse_index_map = dict([ (v,k) for k, v in _index_type_map.iteritems() ]) + attributes = [] + + # indexer helpders + def _class_to_alias(self, cls): + return self._index_type_map.get(cls, '') + + def _alias_to_class(self, alias): + if isinstance(alias, type): # pragma: no cover + return alias # compat: for a short period of time master stored types + return self._reverse_index_map.get(alias, Index) + + def _get_index_factory(self, klass): + if klass == DatetimeIndex: + def f(values, freq=None, tz=None): + return DatetimeIndex._simple_new(values, None, freq=freq, + tz=tz) + return f + return klass + + @property + def is_exists(self): + return True + + def get_attrs(self): + """ retrieve our attributes """ + for n in self.attributes: + setattr(self,n,getattr(self.attrs, n, None)) + + def read_array(self, key): + """ read an array for the specified node (off of group """ + import tables + node = getattr(self.group, key) + data = node[:] + attrs = node._v_attrs + + transposed = getattr(attrs, 'transposed', False) + + if isinstance(node, tables.VLArray): + ret = data[0] + else: + dtype = getattr(attrs, 'value_type', None) + shape = getattr(attrs, 'shape', None) + + if shape is not None: + # length 0 axis + ret = np.empty(shape, dtype=dtype) + else: + ret = data + + if dtype == 'datetime64': + ret = np.array(ret, dtype='M8[ns]') + + if transposed: + return ret.T + else: + return ret + + def read_index(self, key): + variety = getattr(self.attrs, '%s_variety' % key) + + if variety == 'multi': + return self.read_multi_index(key) + elif variety == 'block': + return self.read_block_index(key) + elif variety == 'sparseint': + return self.read_sparse_intindex(key) + elif variety == 'regular': + _, index = self.read_index_node(getattr(self.group, key)) + return index + else: # pragma: no cover + raise Exception('unrecognized index variety: %s' % variety) + + def write_index(self, key, index): + if isinstance(index, MultiIndex): + setattr(self.attrs, '%s_variety' % key, 'multi') + self.write_multi_index(key, index) + elif isinstance(index, BlockIndex): + setattr(self.attrs, '%s_variety' % key, 'block') + self.write_block_index(key, index) + elif isinstance(index, IntIndex): + setattr(self.attrs, '%s_variety' % key, 'sparseint') + self.write_sparse_intindex(key, index) + else: + setattr(self.attrs, '%s_variety' % key, 'regular') + converted = _convert_index(index).set_name('index') + self.write_array(key, converted.values) + node = getattr(self.group, key) + node._v_attrs.kind = converted.kind + node._v_attrs.name = index.name + + if isinstance(index, (DatetimeIndex, PeriodIndex)): + node._v_attrs.index_class = self._class_to_alias(type(index)) + + if hasattr(index, 'freq'): + node._v_attrs.freq = index.freq + + if hasattr(index, 'tz') and index.tz is not None: + zone = tslib.get_timezone(index.tz) + if zone is None: + zone = tslib.tot_seconds(index.tz.utcoffset()) + node._v_attrs.tz = zone + + + def write_block_index(self, key, index): + self.write_array('%s_blocs' % key, index.blocs) + self.write_array('%s_blengths' % key, index.blengths) + setattr(self.attrs, '%s_length' % key, index.length) + + def read_block_index(self, key): + length = getattr(self.attrs, '%s_length' % key) + blocs = self.read_array('%s_blocs' % key) + blengths = self.read_array('%s_blengths' % key) + return BlockIndex(length, blocs, blengths) + + def write_sparse_intindex(self, key, index): + self.write_array('%s_indices' % key, index.indices) + setattr(self.attrs, '%s_length' % key, index.length) + + def read_sparse_intindex(self, key): + length = getattr(self.attrs, '%s_length' % key) + indices = self.read_array('%s_indices' % key) + return IntIndex(length, indices) + + def write_multi_index(self, key, index): + setattr(self.attrs, '%s_nlevels' % key, index.nlevels) + + for i, (lev, lab, name) in enumerate(zip(index.levels, + index.labels, + index.names)): + # write the level + level_key = '%s_level%d' % (key, i) + conv_level = _convert_index(lev).set_name(level_key) + self.write_array(level_key, conv_level.values) + node = getattr(self.group, level_key) + node._v_attrs.kind = conv_level.kind + node._v_attrs.name = name + + # write the name + setattr(node._v_attrs, '%s_name%d' % (key, i), name) + + # write the labels + label_key = '%s_label%d' % (key, i) + self.write_array(label_key, lab) + + def read_multi_index(self, key): + nlevels = getattr(self.attrs, '%s_nlevels' % key) + + levels = [] + labels = [] + names = [] + for i in range(nlevels): + level_key = '%s_level%d' % (key, i) + name, lev = self.read_index_node(getattr(self.group, level_key)) + levels.append(lev) + names.append(name) + + label_key = '%s_label%d' % (key, i) + lab = self.read_array(label_key) + labels.append(lab) + + return MultiIndex(levels=levels, labels=labels, names=names) + + def read_index_node(self, node): + data = node[:] + kind = node._v_attrs.kind + name = None + + if 'name' in node._v_attrs: + name = node._v_attrs.name + + index_class = self._alias_to_class(getattr(node._v_attrs, + 'index_class', '')) + factory = self._get_index_factory(index_class) + + kwargs = {} + if 'freq' in node._v_attrs: + kwargs['freq'] = node._v_attrs['freq'] + + if 'tz' in node._v_attrs: + kwargs['tz'] = node._v_attrs['tz'] + + if kind in ('date', 'datetime'): + index = factory(_unconvert_index(data, kind), dtype=object, + **kwargs) + else: + index = factory(_unconvert_index(data, kind), **kwargs) + + index.name = name + + return name, index + + def write_array(self, key, value): + if key in self.group: + self.handle.removeNode(self.group, key) + + # Transform needed to interface with pytables row/col notation + empty_array = any(x == 0 for x in value.shape) + transposed = False + + if not empty_array: + value = value.T + transposed = True + + if self.filters is not None: + atom = None + try: + # get the atom for this datatype + atom = _tables().Atom.from_dtype(value.dtype) + except ValueError: + pass + + if atom is not None: + # create an empty chunked array and fill it from value + ca = self.handle.createCArray(self.group, key, atom, + value.shape, + filters=self.filters) + ca[:] = value + getattr(self.group, key)._v_attrs.transposed = transposed + return + + if value.dtype.type == np.object_: + vlarr = self.handle.createVLArray(self.group, key, + _tables().ObjectAtom()) + vlarr.append(value) + elif value.dtype.type == np.datetime64: + self.handle.createArray(self.group, key, value.view('i8')) + getattr(self.group, key)._v_attrs.value_type = 'datetime64' + else: + if empty_array: + # ugly hack for length 0 axes + arr = np.empty((1,) * value.ndim) + self.handle.createArray(self.group, key, arr) + getattr(self.group, key)._v_attrs.value_type = str(value.dtype) + getattr(self.group, key)._v_attrs.shape = value.shape + else: + self.handle.createArray(self.group, key, value) + + getattr(self.group, key)._v_attrs.transposed = transposed + +class LegacyStorer(GenericStorer): + + def read_index_legacy(self, key): + node = getattr(self.group,key) + data = node[:] + kind = node._v_attrs.kind + return _unconvert_index_legacy(data, kind) + +class LegacySeriesStorer(LegacyStorer): + + def read(self, **kwargs): + index = self.read_index_legacy('index') + values = self.read_array('values') + return Series(values, index=index) + +class LegacyFrameStorer(LegacyStorer): + + def read(self, **kwargs): + index = self.read_index_legacy('index') + columns = self.read_index_legacy('columns') + values = self.read_array('values') + return DataFrame(values, index=index, columns=columns) + +class SeriesStorer(GenericStorer): + pandas_kind = 'series' + attributes = ['name'] + + @property + def shape(self): + try: + return "[%s]" % len(getattr(self.group,'values',None)) + except: + return None + + def read(self, **kwargs): + index = self.read_index('index') + if len(index) > 0: + values = self.read_array('values') + else: + values = [] + + return Series(values, index=index, name=self.name) + + def write(self, obj, **kwargs): + self.write_index('index', obj.index) + self.write_array('values', obj.values) + self.attrs.name = obj.name + +class SparseSeriesStorer(GenericStorer): + pandas_kind = 'sparse_series' + attributes = ['name','fill_value','kind'] + + def read(self, **kwargs): + index = self.read_index('index') + sp_values = self.read_array('sp_values') + sp_index = self.read_index('sp_index') + return SparseSeries(sp_values, index=index, sparse_index=sp_index, + kind=self.kind or 'block', fill_value=self.fill_value, + name=self.name) + + def write(self, obj, **kwargs): + self.write_index('index', obj.index) + self.write_index('sp_index', obj.sp_index) + self.write_array('sp_values', obj.sp_values) + self.attrs.name = obj.name + self.attrs.dill_value = obj.fill_value + self.attrs.kind = obj.kind + +class SparseFrameStorer(GenericStorer): + pandas_kind = 'sparse_frame' + attributes = ['default_kind','default_fill_value'] + + def read(self, **kwargs): + columns = self.read_index('columns') + sdict = {} + for c in columns: + key = 'sparse_series_%s' % c + s = SparseSeriesStorer(self.parent, getattr(self.group,key)) + s.infer_axes() + sdict[c] = s.read() + return SparseDataFrame(sdict, columns=columns, + default_kind=self.default_kind, + default_fill_value=self.default_fill_value) + + def write(self, obj, **kwargs): + """ write it as a collection of individual sparse series """ + for name, ss in obj.iteritems(): + key = 'sparse_series_%s' % name + if key not in self.group._v_children: + node = self.handle.createGroup(self.group, key) + else: + node = getattr(self.group, key) + s = SparseSeriesStorer(self.parent, node) + s.write(ss) + self.attrs.default_fill_value = obj.default_fill_value + self.attrs.default_kind = obj.default_kind + self.write_index('columns', obj.columns) + +class SparsePanelStorer(GenericStorer): + pandas_kind = 'sparse_panel' + attributes = ['default_kind','default_fill_value'] + + def read(self, **kwargs): + items = self.read_index('items') + + sdict = {} + for name in items: + key = 'sparse_frame_%s' % name + node = getattr(self.group, key) + s = SparseFrameStorer(self.parent, getattr(self.group,key)) + s.infer_axes() + sdict[name] = s.read() + return SparsePanel(sdict, items=items, default_kind=self.default_kind, + default_fill_value=self.default_fill_value) + + def write(self, obj, **kwargs): + self.attrs.default_fill_value = obj.default_fill_value + self.attrs.default_kind = obj.default_kind + self.write_index('items', obj.items) + + for name, sdf in obj.iteritems(): + key = 'sparse_frame_%s' % name + if key not in self.group._v_children: + node = self.handle.createGroup(self.group, key) + else: + node = getattr(self.group, key) + s = SparseFrameStorer(self.parent, node) + s.write(sdf) + +class BlockManagerStorer(GenericStorer): + attributes = ['ndim','nblocks'] + is_shape_reversed = False + + @property + def shape(self): + try: + ndim = self.ndim + + # items + items = 0 + for i in range(self.nblocks): + node = getattr(self.group, 'block%d_items' % i) + shape = getattr(node,'shape',None) + if shape is not None: + items += shape[0] + + # data shape + node = getattr(self.group, 'block0_values') + shape = getattr(node,'shape',None) + if shape is not None: + shape = list(shape[0:(ndim-1)]) + else: + shape = [] + + shape.append(items) + + # hacky - this works for frames, but is reversed for panels + if self.is_shape_reversed: + shape = shape[::-1] + + return "[%s]" % ','.join([ str(x) for x in shape ]) + except: + return None + + def read(self, **kwargs): + axes = [] + for i in xrange(self.ndim): + ax = self.read_index('axis%d' % i) + axes.append(ax) + + items = axes[0] + blocks = [] + for i in range(self.nblocks): + blk_items = self.read_index('block%d_items' % i) + values = self.read_array('block%d_values' % i) + blk = make_block(values, blk_items, items) + blocks.append(blk) + + return self.obj_type(BlockManager(blocks, axes)) + + def write(self, obj, **kwargs): + data = obj._data + if not data.is_consolidated(): + data = data.consolidate() + + self.attrs.ndim = data.ndim + for i, ax in enumerate(data.axes): + self.write_index('axis%d' % i, ax) + + # Supporting mixed-type DataFrame objects...nontrivial + self.attrs.nblocks = nblocks = len(data.blocks) + for i in range(nblocks): + blk = data.blocks[i] + # I have no idea why, but writing values before items fixed #2299 + self.write_array('block%d_values' % i, blk.values) + self.write_index('block%d_items' % i, blk.items) + +class FrameStorer(BlockManagerStorer): + pandas_kind = 'frame' + obj_type = DataFrame + +class PanelStorer(BlockManagerStorer): + pandas_kind = 'wide' + obj_type = Panel + is_shape_reversed = True + + def write(self, obj, **kwargs): + obj._consolidate_inplace() + return super(PanelStorer, self).write(obj, **kwargs) + +class Table(Storer): + """ represent a table: + facilitate read/write of various types of tables + + Attrs in Table Node + ------------------- + These are attributes that are store in the main table node, they are necessary + to recreate these tables when read back in. + + index_axes : a list of tuples of the (original indexing axis and index column) + non_index_axes: a list of tuples of the (original index axis and columns on a non-indexing axis) + values_axes : a list of the columns which comprise the data of this table + data_columns : a list of the columns that we are allowing indexing (these become single columns in values_axes) + nan_rep : the string to use for nan representations for string objects + levels : the names of levels + + """ + pandas_kind = 'wide_table' + table_type = None + levels = 1 + is_table = True + + def __init__(self, *args, **kwargs): + super(Table, self).__init__(*args, **kwargs) + self.index_axes = [] + self.non_index_axes = [] + self.values_axes = [] + self.data_columns = [] + self.nan_rep = None + self.selection = None + + @property + def table_type_short(self): + return self.table_type.split('_')[0] + + def __repr__(self): + """ return a pretty representatgion of myself """ + self.infer_axes() + dc = ",dc->[%s]" % ','.join(self.data_columns) if len(self.data_columns) else '' + + ver = '' + if self.is_old_version: + ver = "[%s]" % '.'.join([ str(x) for x in self.version ]) + + return "%-12.12s%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, + ver, + self.table_type_short, + self.nrows, + ','.join([ a.name for a in self.index_axes ]), + dc) + + def __getitem__(self, c): + """ return the axis for c """ + for a in self.axes: + if c == a.name: + return a + return None + + def validate(self, other): + """ validate against an existing table """ + if other is None: return + + if other.table_type != self.table_type: + raise TypeError("incompatible table_type with existing [%s - %s]" % + (other.table_type, self.table_type)) + + for c in ['index_axes','non_index_axes','values_axes']: + if getattr(self,c,None) != getattr(other,c,None): + raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,getattr(self,c,None),getattr(other,c,None))) + + @property + def nrows_expected(self): + """ based on our axes, compute the expected nrows """ + return np.prod([ i.cvalues.shape[0] for i in self.index_axes ]) + + @property + def is_exists(self): + """ has this table been created """ + return 'table' in self.group + + @property + def storable(self): + return getattr(self.group,'table',None) + + @property + def table(self): + """ return the table group (this is my storable) """ + return self.storable + @property def description(self): return self.table.description @@ -1699,7 +1923,17 @@ def set_attrs(self): self.attrs.nan_rep = self.nan_rep self.attrs.levels = self.levels - def validate_version(self, where=None): + def get_attrs(self): + """ retrieve our attributes """ + self.non_index_axes = getattr(self.attrs,'non_index_axes',None) or [] + self.data_columns = getattr(self.attrs,'data_columns',None) or [] + self.nan_rep = getattr(self.attrs,'nan_rep',None) + self.levels = getattr(self.attrs,'levels',None) or [] + t = self.table + self.index_axes = [ a.infer(t) for a in self.indexables if a.is_an_indexable ] + self.values_axes = [ a.infer(t) for a in self.indexables if not a.is_an_indexable ] + + def validate_version(self, where = None): """ are we trying to operate on an old version? """ if where is not None: if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: @@ -1812,27 +2046,6 @@ def read_axes(self, where, **kwargs): return True - def infer_axes(self): - """ infer the axes from the indexables: - return a boolean indicating if we have a valid table or not """ - - table = self.table - if table is None: - return False - - self.non_index_axes = getattr( - self.attrs, 'non_index_axes', None) or [] - self.data_columns = getattr( - self.attrs, 'data_columns', None) or [] - self.nan_rep = getattr(self.attrs, 'nan_rep', None) - self.levels = getattr( - self.attrs, 'levels', None) or [] - self.index_axes = [a.infer( - self.table) for a in self.indexables if a.is_an_indexable] - self.values_axes = [a.infer( - self.table) for a in self.indexables if not a.is_an_indexable] - return True - def get_object(self, obj): """ return the data for this obj """ return obj @@ -1852,13 +2065,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, """ + # set the default axes if needed + if axes is None: + axes = _AXES_MAP[type(obj)] + # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): existing_table = self.copy() - axes = [a.axis for a in existing_table.index_axes] + existing_table.infer_axes() + axes = [ a.axis for a in existing_table.index_axes] data_columns = existing_table.data_columns nan_rep = existing_table.nan_rep else: @@ -2022,10 +2240,6 @@ def create_description(self, complib=None, complevel=None, fletcher32=False, exp return d - def read(self, **kwargs): - raise NotImplementedError( - "cannot read on an abstract table: subclasses should implement") - def read_coordinates(self, where=None, **kwargs): """ select coordinates (row numbers) from a table; return the coordinates object """ @@ -2063,18 +2277,6 @@ def read_column(self, column, **kwargs): raise KeyError("column [%s] not found in the table" % column) - def write(self, **kwargs): - raise NotImplementedError("cannot write on an abstract table") - - def delete(self, where=None, **kwargs): - """ support fully deleting the node in its entirety (only) - where specification must be None """ - if where is None: - self.handle.removeNode(self.group, recursive=True) - return None - - raise NotImplementedError("cannot delete on an abstract table") - - class WORMTable(Table): """ a write-once read-many table: this format DOES NOT ALLOW appending to a table. writing is a one-time operation the data are stored in a format @@ -2195,6 +2397,7 @@ def read(self, where=None, columns=None, **kwargs): class LegacyFrameTable(LegacyTable): """ support the legacy frame table """ + pandas_kind = 'frame_table' table_type = 'legacy_frame' obj_type = Panel @@ -2217,16 +2420,14 @@ def write(self, axes, obj, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=50000, expectedrows=None, **kwargs): - # create the table if it doesn't exist (or get it if it does) - if not append: - if 'table' in self.group: - self.handle.removeNode(self.group, 'table') + if not append and self.is_exists: + self.handle.removeNode(self.group, 'table') # create the axes self.create_axes(axes=axes, obj=obj, validate=append, min_itemsize=min_itemsize, **kwargs) - if 'table' not in self.group: + if not self.is_exists: # create the table options = self.create_description(complib=complib, @@ -2362,6 +2563,7 @@ def delete(self, where=None, **kwargs): class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ + pandas_kind = 'frame_table' table_type = 'appendable_frame' ndim = 2 obj_type = DataFrame @@ -2462,50 +2664,6 @@ class AppendableNDimTable(AppendablePanelTable): ndim = 4 obj_type = Panel4D -# table maps -_TABLE_MAP = { - 'appendable_frame': AppendableFrameTable, - 'appendable_multiframe': AppendableMultiFrameTable, - 'appendable_panel': AppendablePanelTable, - 'appendable_ndim': AppendableNDimTable, - 'worm': WORMTable, - 'legacy_frame': LegacyFrameTable, - 'legacy_panel': LegacyPanelTable, - 'default': AppendablePanelTable, -} - - -def create_table(parent, group, typ=None, **kwargs): - """ return a suitable Table class to operate """ - - pt = getattr(group._v_attrs, 'pandas_type', None) - tt = getattr(group._v_attrs, 'table_type', None) or typ - - # a new node - if pt is None: - - return (_TABLE_MAP.get(typ) or _TABLE_MAP.get('default'))(parent, group, **kwargs) - - # existing node (legacy) - if tt is None: - - # distiguish between a frame/table - tt = 'legacy_panel' - try: - fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == 'value': - tt = 'legacy_frame' - except: - pass - - return _TABLE_MAP.get(tt)(parent, group, **kwargs) - - -def _itemsize_string_array(arr): - """ return the maximum size of elements in a strnig array """ - return max([str_len(arr[v].ravel()).max() for v in range(arr.shape[0])]) - - def _convert_index(index): if isinstance(index, DatetimeIndex): converted = index.asi8 @@ -2554,36 +2712,6 @@ def _convert_index(index): atom = _tables().ObjectAtom() return IndexCol(np.asarray(values, dtype='O'), 'object', atom) - -def _read_array(group, key): - import tables - node = getattr(group, key) - data = node[:] - attrs = node._v_attrs - - transposed = getattr(attrs, 'transposed', False) - - if isinstance(node, tables.VLArray): - ret = data[0] - else: - dtype = getattr(attrs, 'value_type', None) - shape = getattr(attrs, 'shape', None) - - if shape is not None: - # length 0 axis - ret = np.empty(shape, dtype=dtype) - else: - ret = data - - if dtype == 'datetime64': - ret = np.array(ret, dtype='M8[ns]') - - if transposed: - return ret.T - else: - return ret - - def _unconvert_index(data, kind): if kind == 'datetime64': index = DatetimeIndex(data) @@ -2600,7 +2728,6 @@ def _unconvert_index(data, kind): raise ValueError('unrecognized index type %s' % kind) return index - def _unconvert_index_legacy(data, kind, legacy=False): if kind == 'datetime': index = lib.time64_to_datetime(data) @@ -2610,7 +2737,6 @@ def _unconvert_index_legacy(data, kind, legacy=False): raise ValueError('unrecognized index type %s' % kind) return index - def _maybe_convert(values, val_kind): if _need_convert(val_kind): conv = _get_converter(val_kind) @@ -2618,7 +2744,6 @@ def _maybe_convert(values, val_kind): values = conv(values) return values - def _get_converter(kind): if kind == 'datetime64': return lambda x: np.array(x, dtype='M8[ns]') @@ -2627,38 +2752,11 @@ def _get_converter(kind): else: # pragma: no cover raise ValueError('invalid kind %s' % kind) - def _need_convert(kind): if kind in ('datetime', 'datetime64'): return True return False - -def _is_table_type(group): - try: - return 'table' in group._v_attrs.pandas_type - except AttributeError: - # new node, e.g. - return False - -_index_type_map = {DatetimeIndex: 'datetime', - PeriodIndex: 'period'} - -_reverse_index_map = {} -for k, v in _index_type_map.iteritems(): - _reverse_index_map[v] = k - - -def _class_to_alias(cls): - return _index_type_map.get(cls, '') - - -def _alias_to_class(alias): - if isinstance(alias, type): # pragma: no cover - return alias # compat: for a short period of time master stored types - return _reverse_index_map.get(alias, Index) - - class Term(object): """ create a term object that holds a field, op, and value @@ -2826,7 +2924,7 @@ def eval(self): def convert_value(self, v): """ convert the expression that is in the term to something that is accepted by pytables """ - if self.kind == 'datetime64': + if self.kind == 'datetime64' or self.kind == 'datetime' : return [lib.Timestamp(v).value, None] elif isinstance(v, datetime) or hasattr(v, 'timetuple') or self.kind == 'date': return [time.mktime(v.timetuple()), None] @@ -2932,10 +3030,3 @@ def select_coords(self): return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True) -def _get_index_factory(klass): - if klass == DatetimeIndex: - def f(values, freq=None, tz=None): - return DatetimeIndex._simple_new(values, None, freq=freq, - tz=tz) - return f - return klass diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 4ce89057a1a46..ebf5b25a4a73c 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -73,6 +73,22 @@ def test_repr(self): self.store['d'] = tm.makePanel() self.store['foo/bar'] = tm.makePanel() self.store.append('e', tm.makePanel()) + + df = tm.makeDataFrame() + df['obj1'] = 'foo' + df['obj2'] = 'bar' + df['bool1'] = df['A'] > 0 + df['bool2'] = df['B'] > 0 + df['bool3'] = True + df['int1'] = 1 + df['int2'] = 2 + df['timestamp1'] = Timestamp('20010102') + df['timestamp2'] = Timestamp('20010103') + df['datetime1'] = datetime.datetime(2001,1,2,0,0) + df['datetime2'] = datetime.datetime(2001,1,3,0,0) + df.ix[3:6,['obj1']] = np.nan + df = df.consolidate().convert_objects() + self.store['df'] = df repr(self.store) str(self.store) @@ -172,11 +188,11 @@ def test_put(self): # node does not currently exist, test _is_table_type returns False in # this case - self.assertRaises( - ValueError, self.store.put, 'f', df[10:], append=True) + #self.store.remove('f') + #self.assertRaises(ValueError, self.store.put, 'f', df[10:], append=True) - # OK - self.store.put('c', df[10:], append=True) + # can't put to a table (use append instead) + self.assertRaises(ValueError, self.store.put, 'c', df[10:], append=True) # overwrite table self.store.put('c', df[:10], table=True, append=False) @@ -398,9 +414,8 @@ def test_append_with_strings(self): wp2 = wp.rename_axis( dict([(x, "%s_extra" % x) for x in wp.minor_axis]), axis=2) - def check_col(key, name, size): - self.assert_(getattr(self.store.get_table( - key).table.description, name).itemsize == size) + def check_col(key,name,size): + self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size) self.store.append('s1', wp, min_itemsize=20) self.store.append('s1', wp2) @@ -499,9 +514,8 @@ def test_append_with_data_columns(self): tm.assert_frame_equal(result, expected) # using min_itemsize and a data column - def check_col(key, name, size): - self.assert_(getattr(self.store.get_table( - key).table.description, name).itemsize == size) + def check_col(key,name,size): + self.assert_(getattr(self.store.get_storer(key).table.description,name).itemsize == size) self.store.remove('df') self.store.append('df', df_new, data_columns=['string'], @@ -575,8 +589,8 @@ def check_col(key, name, size): def test_create_table_index(self): - def col(t, column): - return getattr(self.store.get_table(t).table.cols, column) + def col(t,column): + return getattr(self.store.get_storer(t).table.cols,column) # index=False wp = tm.makePanel() @@ -626,6 +640,7 @@ def col(t, column): assert(col('f2', 'string2').is_indexed is False) # try to index a non-table + self.store.remove('f2') self.store.put('f2', df) self.assertRaises(Exception, self.store.create_table_index, 'f2') @@ -1421,6 +1436,14 @@ def test_select(self): expected = df[df.A > 0].reindex(columns=['C', 'D']) tm.assert_frame_equal(expected, result) + # with a Timestamp data column (GH #2637) + df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300))) + self.store.remove('df') + self.store.append('df', df, data_columns=['ts', 'A']) + result = self.store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))]) + expected = df[df.ts >= Timestamp('2012-02-01')] + tm.assert_frame_equal(expected, result) + def test_panel_select(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) @@ -1760,8 +1783,8 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): # check indicies & nrows for k in tstore.keys(): if tstore.is_table(k): - new_t = tstore.get_table(k) - orig_t = store.get_table(k) + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) self.assert_(orig_t.nrows == new_t.nrows) for a in orig_t.axes: From 38debb7f69098df73ac30dda7dae316bd6afe77c Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 4 Jan 2013 20:16:55 -0500 Subject: [PATCH 08/12] TST: more tests, added ncols to table output in printing --- pandas/io/pytables.py | 41 ++++++++++++++++++++------------ pandas/io/tests/test_pytables.py | 18 ++++++++++++++ 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 73db793069817..60c91e2d50e83 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -233,8 +233,8 @@ def __repr__(self): for k in self.keys(): s = self.get_storer(k) if s is not None: - keys.append(str(s.pathname)) - values.append(str(s)) + keys.append(str(s.pathname or k)) + values.append(str(s or 'invalid_HDFStore node')) output += adjoin(12, keys, values) else: @@ -671,15 +671,22 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None def _create_storer(self, group, value = None, table = False, append = False, **kwargs): """ return a suitable Storer class to operate """ + def error(t): + raise Exception("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % + (t,group,type(value),table,append,kwargs)) + pt = getattr(group._v_attrs,'pandas_type',None) tt = getattr(group._v_attrs,'table_type',None) - # infer the typ from the passed value + # infer the pt from the passed value if pt is None: if value is None: raise Exception("cannot create a storer if the object is not existing nor a value are passed") - pt = _TYPE_MAP[type(value)] + try: + pt = _TYPE_MAP[type(value)] + except: + error('_TYPE_MAP') # we are actually a table if table or append: @@ -690,8 +697,7 @@ def _create_storer(self, group, value = None, table = False, append = False, **k try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) except: - raise Exception("cannot properly create the storer for: [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % - (group,value,table,append,kwargs)) + error('_STORER_MAP') # existing node (and must be a table) if tt is None: @@ -718,10 +724,9 @@ def _create_storer(self, group, value = None, table = False, append = False, **k pass try: - return globals()[_TABLE_MAP[tt or 'appendable_panel']](self, group, **kwargs) + return globals()[_TABLE_MAP[tt]](self, group, **kwargs) except: - raise Exception("cannot properly create the storer for: [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" % - (group,value,table,append,kwargs)) + error('_TABLE_MAP') def _write_to_group(self, key, value, index=True, table=False, append=False, complib=None, **kwargs): group = self.get_node(key) @@ -1834,12 +1839,13 @@ def __repr__(self): if self.is_old_version: ver = "[%s]" % '.'.join([ str(x) for x in self.version ]) - return "%-12.12s%s (typ->%s,nrows->%s,indexers->[%s]%s)" % (self.pandas_type, - ver, - self.table_type_short, - self.nrows, - ','.join([ a.name for a in self.index_axes ]), - dc) + return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % (self.pandas_type, + ver, + self.table_type_short, + self.nrows, + self.ncols, + ','.join([ a.name for a in self.index_axes ]), + dc) def __getitem__(self, c): """ return the axis for c """ @@ -1887,6 +1893,11 @@ def description(self): def axes(self): return itertools.chain(self.index_axes, self.values_axes) + @property + def ncols(self): + """ the number of total columns in the values axes """ + return sum([ len(a.values) for a in self.values_axes ]) + @property def is_transposed(self): return False diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ebf5b25a4a73c..29a4ec54b69ee 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -89,6 +89,10 @@ def test_repr(self): df.ix[3:6,['obj1']] = np.nan df = df.consolidate().convert_objects() self.store['df'] = df + + # make a random group in hdf space + self.store.handle.createGroup(self.store.handle.root,'bah') + repr(self.store) str(self.store) @@ -776,6 +780,20 @@ def test_append_hierarchical(self): def test_append_misc(self): + # unsuported data types for non-tables + p4d = tm.makePanel4D() + self.assertRaises(Exception, self.store.put,'p4d',p4d) + + # unsupported data type for table + s = tm.makeStringSeries() + self.assertRaises(Exception, self.store.append,'s',s) + + # unsuported data types + self.assertRaises(Exception, self.store.put,'abc',None) + self.assertRaises(Exception, self.store.put,'abc','123') + self.assertRaises(Exception, self.store.put,'abc',123) + self.assertRaises(Exception, self.store.put,'abc',np.arange(5)) + df = tm.makeDataFrame() self.store.append('df', df, chunksize=1) result = self.store.select('df') From 97f96d72f4686c45bf71f72a07947f0b55b5283c Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 4 Jan 2013 20:21:50 -0500 Subject: [PATCH 09/12] BLD: rebased - for some reason was not up to date --- pandas/io/pytables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 60c91e2d50e83..f47d348594573 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4,7 +4,6 @@ """ # pylint: disable-msg=E1101,W0613,W0603 - from datetime import datetime, date import time import re From 7964cd538577e7171d73463c25986bdffe58757b Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 5 Jan 2013 15:57:40 -0500 Subject: [PATCH 10/12] BLD: merged in #2632 locally, resolved a couple of issues --- pandas/io/pytables.py | 2 +- pandas/io/tests/test_pytables.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f47d348594573..80232689fbdbb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2426,7 +2426,7 @@ class AppendableTable(LegacyTable): _indexables = None table_type = 'appendable' - def write(self, axes, obj, append=False, complib=None, + def write(self, obj, axes=None, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=50000, expectedrows=None, **kwargs): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 29a4ec54b69ee..8efc0a37a34f2 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -609,7 +609,7 @@ def col(t,column): assert(col('p5i', 'minor_axis').is_indexed is True) # default optlevels - self.store.get_table('p5').create_index() + self.store.get_storer('p5').create_index() assert(col('p5', 'major_axis').index.optlevel == 6) assert(col('p5', 'minor_axis').index.kind == 'medium') From a6b7585ba15427414dbbc384dceff92be3be4372 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 5 Jan 2013 16:56:46 -0500 Subject: [PATCH 11/12] TST: ignore remove file errors --- pandas/io/tests/test_pytables.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 8efc0a37a34f2..274087605468b 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -37,7 +37,10 @@ def setUp(self): def tearDown(self): self.store.close() - os.remove(self.path) + try: + os.remove(self.path) + except: + pass def test_factory_fun(self): try: From 4a455124de116d0b7178c2b1baeac95086ba72b1 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 5 Jan 2013 17:47:03 -0500 Subject: [PATCH 12/12] BLD: fixes error in tests on amd64 (because of open file handle) --- pandas/io/tests/test_pytables.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 274087605468b..cfa0f2e2bfe05 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1818,7 +1818,10 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): store.close() tstore.close() import os - os.remove(new_f) + try: + os.remove(new_f) + except: + pass do_copy() do_copy(keys = ['df'])