Skip to content

Commit 395203f

Browse files
committed
BUG: updated the copy method to work with supplied options
1 parent bda1cbe commit 395203f

File tree

4 files changed

+107
-30
lines changed

4 files changed

+107
-30
lines changed

RELEASE.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ pandas 0.10.1
5050
to do multiple-table append/selection
5151
- added support for datetime64 in columns
5252
- added method ``unique`` to select the unique values in an indexable or data column
53-
- added method ``copy_to`` to copy an existing store (and possibly upgrade)
53+
- added method ``copy`` to copy an existing store (and possibly upgrade)
5454
- Add ``logx`` option to DataFrame/Series.plot (GH2327_, #2565)
5555
- Support reading gzipped data from file-like object
5656
- ``pivot_table`` aggfunc can be anything used in GroupBy.aggregate (GH2643_)

doc/source/io.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ Or on-the-fly compression (this only applies to tables). You can turn off file c
13431343

13441344
- ``ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5``
13451345

1346-
Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow you to reuse previously deleted space (alternatively, one can simply remove the file and write again).
1346+
Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow you to reuse previously deleted space. Aalternatively, one can simply remove the file and write again, or use the ``copy`` method.
13471347

13481348
Notes & Caveats
13491349
~~~~~~~~~~~~~~~
@@ -1367,9 +1367,7 @@ Notes & Caveats
13671367
Compatibility
13681368
~~~~~~~~~~~~~
13691369

1370-
0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas
1371-
however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire
1372-
file and write it out using the new format to take advantage of the updates. The group attribute ``pandas_version`` contains the version information.
1370+
0.10.1 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas however, query terms using the prior (undocumented) methodology are unsupported. ``HDFStore`` will issue a warning if you try to use a prior-version format file. You must read in the entire file and write it out using the new format, using the method ``copy`` to take advantage of the updates. The group attribute ``pandas_version`` contains the version information. ``copy`` takes a number of options, please see the docstring.
13731371

13741372

13751373
.. ipython:: python
@@ -1380,7 +1378,7 @@ file and write it out using the new format to take advantage of the updates. The
13801378
legacy_store
13811379

13821380
# copy (and return the new handle)
1383-
new_store = legacy_store.copy_to('store_new.h5')
1381+
new_store = legacy_store.copy('store_new.h5')
13841382
new_store
13851383
new_store.close()
13861384

pandas/io/pytables.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -630,15 +630,45 @@ def get_table(self, key):
630630
t.infer_axes()
631631
return t
632632

633-
def copy_to(self, file):
634-
""" copy the existing store to a new file, upgrading in place """
635-
new_store = HDFStore(file, mode = 'w')
636-
for k, g in self.iteritems():
637-
data = self.select(k)
638-
if _is_table_type(g):
639-
new_store.append(k,data)
640-
else:
641-
new_store.put(k,data)
633+
def is_table(self, key):
634+
""" return a boolean if I am a table """
635+
group = self.get_node(key)
636+
if group is None:
637+
raise KeyError('No object named %s in the file' % key)
638+
return _is_table_type(group)
639+
640+
def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None, fletcher32 = False):
641+
""" copy the existing store to a new file, upgrading in place
642+
643+
Parameters
644+
----------
645+
propindexes: restore indexes in copied file (defaults to True)
646+
keys : list of keys to include in the copy (defaults to all)
647+
mode, complib, complevel, fletcher32 same as in HDFStore.__init__
648+
649+
Returns
650+
-------
651+
open file handle of the new store
652+
653+
"""
654+
new_store = HDFStore(file, mode = mode, complib = complib, complevel = complevel, fletcher32 = fletcher32)
655+
if keys is None:
656+
keys = self.keys()
657+
if not isinstance(keys, (tuple,list)):
658+
keys = [ keys ]
659+
for k in keys:
660+
n = self.get_node(k)
661+
if n is not None:
662+
data = self.select(k)
663+
if _is_table_type(n):
664+
665+
t = self.get_table(k)
666+
index = False
667+
if propindexes:
668+
index = [ a.name for a in t.axes if a.is_indexed ]
669+
new_store.append(k,data, index=index, data_columns=getattr(t,'data_columns',None))
670+
else:
671+
new_store.put(k,data)
642672
return new_store
643673

644674
###### private methods ######
@@ -1131,6 +1161,14 @@ def __eq__(self, other):
11311161
def __ne__(self, other):
11321162
return not self.__eq__(other)
11331163

1164+
@property
1165+
def is_indexed(self):
1166+
""" return whether I am an indexed column """
1167+
try:
1168+
return getattr(self.table.cols,self.cname).is_indexed
1169+
except:
1170+
False
1171+
11341172
def copy(self):
11351173
new_self = copy.copy(self)
11361174
return new_self
@@ -1543,6 +1581,13 @@ def __repr__(self):
15431581

15441582
__str__ = __repr__
15451583

1584+
def __getitem__(self, c):
1585+
""" return the axis for c """
1586+
for a in self.axes:
1587+
if c == a.name:
1588+
return a
1589+
return None
1590+
15461591
def copy(self):
15471592
new_self = copy.copy(self)
15481593
return new_self

pandas/io/tests/test_pytables.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1735,25 +1735,59 @@ def test_legacy_0_10_read(self):
17351735
store.select(k)
17361736
store.close()
17371737

1738-
def test_legacy_copy_to(self):
1738+
def test_copy(self):
17391739
pth = curpath()
1740+
def do_copy(f = None, keys = None, propindexes = True, **kwargs):
1741+
try:
1742+
import os
1743+
1744+
if f is None:
1745+
f = os.path.join(pth, 'legacy_0.10.h5')
1746+
1747+
store = HDFStore(f, 'r')
1748+
import tempfile
1749+
tmp = tempfile.mkstemp()[1]
1750+
tstore = store.copy(tmp, keys = keys, propindexes = propindexes, **kwargs)
1751+
1752+
# check keys
1753+
if keys is None:
1754+
keys = store.keys()
1755+
self.assert_(set(keys) == set(tstore.keys()))
1756+
1757+
# check indicies & nrows
1758+
for k in tstore.keys():
1759+
if tstore.is_table(k):
1760+
new_t = tstore.get_table(k)
1761+
orig_t = store.get_table(k)
1762+
1763+
self.assert_(orig_t.nrows == new_t.nrows)
1764+
for a in orig_t.axes:
1765+
if a.is_indexed:
1766+
self.assert_(new_t[a.name].is_indexed == True)
1767+
1768+
except:
1769+
pass
1770+
finally:
1771+
store.close()
1772+
tstore.close()
1773+
import os
1774+
os.remove(tmp)
1775+
1776+
do_copy()
1777+
do_copy(keys = ['df'])
1778+
do_copy(propindexes = False)
1779+
1780+
# new table
1781+
df = tm.makeDataFrame()
17401782
try:
1741-
import os
1742-
store = HDFStore(os.path.join(pth, 'legacy_0.10.h5'), 'r')
1743-
import tempfile
1744-
tmp = tempfile.mkstemp()[1]
1745-
tstore = store.copy_to(tmp)
1746-
1747-
# the tmp store
1748-
for k in tstore.keys():
1749-
self.assert_(k in store)
1750-
except:
1751-
pass
1783+
st = HDFStore(self.scratchpath)
1784+
st.append('df', df, data_columns = ['A'])
1785+
st.close()
1786+
do_copy(f = self.scratchpath)
1787+
do_copy(f = self.scratchpath, propindexes = False)
17521788
finally:
1753-
store.close()
1754-
tstore.close()
17551789
import os
1756-
os.remove(tmp)
1790+
os.remove(self.scratchpath)
17571791

17581792
def test_legacy_table_write(self):
17591793
raise nose.SkipTest

0 commit comments

Comments
 (0)