Skip to content

Commit d94ff14

Browse files
committed
Merge pull request #2933 from jreback/pytables_2931
BUG: fixes issue in HDFStore w.r.t. compressed empty sparse series (GH #2931)
2 parents eb3134f + 65f2882 commit d94ff14

File tree

2 files changed

+54
-12
lines changed

2 files changed

+54
-12
lines changed

pandas/io/pytables.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,16 @@ def read_index_node(self, node):
15961596

15971597
return name, index
15981598

1599+
1600+
def write_array_empty(self, key, value):
1601+
""" write a 0-len array """
1602+
1603+
# ugly hack for length 0 axes
1604+
arr = np.empty((1,) * value.ndim)
1605+
self._handle.createArray(self.group, key, arr)
1606+
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
1607+
getattr(self.group, key)._v_attrs.shape = value.shape
1608+
15991609
def write_array(self, key, value):
16001610
if key in self.group:
16011611
self._handle.removeNode(self.group, key)
@@ -1618,11 +1628,16 @@ def write_array(self, key, value):
16181628

16191629
if atom is not None:
16201630
# create an empty chunked array and fill it from value
1621-
ca = self._handle.createCArray(self.group, key, atom,
1622-
value.shape,
1623-
filters=self._filters)
1624-
ca[:] = value
1625-
getattr(self.group, key)._v_attrs.transposed = transposed
1631+
if not empty_array:
1632+
ca = self._handle.createCArray(self.group, key, atom,
1633+
value.shape,
1634+
filters=self._filters)
1635+
ca[:] = value
1636+
getattr(self.group, key)._v_attrs.transposed = transposed
1637+
1638+
else:
1639+
self.write_array_empty(key, value)
1640+
16261641
return
16271642

16281643
if value.dtype.type == np.object_:
@@ -1645,11 +1660,7 @@ def write_array(self, key, value):
16451660
getattr(self.group, key)._v_attrs.value_type = 'datetime64'
16461661
else:
16471662
if empty_array:
1648-
# ugly hack for length 0 axes
1649-
arr = np.empty((1,) * value.ndim)
1650-
self._handle.createArray(self.group, key, arr)
1651-
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
1652-
getattr(self.group, key)._v_attrs.shape = value.shape
1663+
self.write_array_empty(key, value)
16531664
else:
16541665
self._handle.createArray(self.group, key, value)
16551666

@@ -1720,7 +1731,7 @@ def write(self, obj, **kwargs):
17201731
self.write_index('sp_index', obj.sp_index)
17211732
self.write_array('sp_values', obj.sp_values)
17221733
self.attrs.name = obj.name
1723-
self.attrs.dill_value = obj.fill_value
1734+
self.attrs.fill_value = obj.fill_value
17241735
self.attrs.kind = obj.kind
17251736

17261737
class SparseFrameStorer(GenericStorer):

pandas/io/tests/test_pytables.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,7 @@ def test_sparse_frame(self):
12821282
s.ix[3:5, 1:3] = np.nan
12831283
s.ix[8:10, -2] = np.nan
12841284
ss = s.to_sparse()
1285+
12851286
self._check_double_roundtrip(ss, tm.assert_frame_equal,
12861287
check_frame_type=True)
12871288

@@ -1565,6 +1566,36 @@ def test_overwrite_node(self):
15651566

15661567
tm.assert_series_equal(store['a'], ts)
15671568

1569+
def test_sparse_with_compression(self):
1570+
1571+
# GH 2931
1572+
1573+
# make sparse dataframe
1574+
df = DataFrame(np.random.binomial(n=1, p=.01, size=(1e3, 10))).to_sparse(fill_value=0)
1575+
1576+
# case 1: store uncompressed
1577+
self._check_double_roundtrip(df, tm.assert_frame_equal,
1578+
compression = False,
1579+
check_frame_type=True)
1580+
1581+
# case 2: store compressed (works)
1582+
self._check_double_roundtrip(df, tm.assert_frame_equal,
1583+
compression = 'zlib',
1584+
check_frame_type=True)
1585+
1586+
# set one series to be completely sparse
1587+
df[0] = np.zeros(1e3)
1588+
1589+
# case 3: store df with completely sparse series uncompressed
1590+
self._check_double_roundtrip(df, tm.assert_frame_equal,
1591+
compression = False,
1592+
check_frame_type=True)
1593+
1594+
# case 4: try storing df with completely sparse series compressed (fails)
1595+
self._check_double_roundtrip(df, tm.assert_frame_equal,
1596+
compression = 'zlib',
1597+
check_frame_type=True)
1598+
15681599
def test_select(self):
15691600
wp = tm.makePanel()
15701601

@@ -1967,7 +1998,7 @@ def _check_double_roundtrip(self, obj, comparator, compression=False,
19671998
**kwargs):
19681999
options = {}
19692000
if compression:
1970-
options['complib'] = _default_compressor
2001+
options['complib'] = compression or _default_compressor
19712002

19722003
with ensure_clean(self.path, 'w', **options) as store:
19732004
store['obj'] = obj

0 commit comments

Comments
 (0)