From 65f2882be3fe40f8bb895e9e7d370804527e1706 Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 25 Feb 2013 20:10:07 -0500 Subject: [PATCH] BUG: pathological case on SparseSeries with 0-len array and compression (GH 2931) --- pandas/io/pytables.py | 33 +++++++++++++++++++++----------- pandas/io/tests/test_pytables.py | 33 +++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8067d7e0be17f..ac7ca152ffcee 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1596,6 +1596,16 @@ def read_index_node(self, node): return name, index + + def write_array_empty(self, key, value): + """ write a 0-len array """ + + # ugly hack for length 0 axes + arr = np.empty((1,) * value.ndim) + self._handle.createArray(self.group, key, arr) + getattr(self.group, key)._v_attrs.value_type = str(value.dtype) + getattr(self.group, key)._v_attrs.shape = value.shape + def write_array(self, key, value): if key in self.group: self._handle.removeNode(self.group, key) @@ -1618,11 +1628,16 @@ def write_array(self, key, value): if atom is not None: # create an empty chunked array and fill it from value - ca = self._handle.createCArray(self.group, key, atom, - value.shape, - filters=self._filters) - ca[:] = value - getattr(self.group, key)._v_attrs.transposed = transposed + if not empty_array: + ca = self._handle.createCArray(self.group, key, atom, + value.shape, + filters=self._filters) + ca[:] = value + getattr(self.group, key)._v_attrs.transposed = transposed + + else: + self.write_array_empty(key, value) + return if value.dtype.type == np.object_: @@ -1645,11 +1660,7 @@ def write_array(self, key, value): getattr(self.group, key)._v_attrs.value_type = 'datetime64' else: if empty_array: - # ugly hack for length 0 axes - arr = np.empty((1,) * value.ndim) - self._handle.createArray(self.group, key, arr) - getattr(self.group, key)._v_attrs.value_type = str(value.dtype) - getattr(self.group, key)._v_attrs.shape = value.shape + self.write_array_empty(key, value) else: self._handle.createArray(self.group, key, value) @@ -1720,7 +1731,7 @@ def write(self, obj, **kwargs): self.write_index('sp_index', obj.sp_index) self.write_array('sp_values', obj.sp_values) self.attrs.name = obj.name - self.attrs.dill_value = obj.fill_value + self.attrs.fill_value = obj.fill_value self.attrs.kind = obj.kind class SparseFrameStorer(GenericStorer): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index d4654d01f1e1e..986329a615665 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1282,6 +1282,7 @@ def test_sparse_frame(self): s.ix[3:5, 1:3] = np.nan s.ix[8:10, -2] = np.nan ss = s.to_sparse() + self._check_double_roundtrip(ss, tm.assert_frame_equal, check_frame_type=True) @@ -1565,6 +1566,36 @@ def test_overwrite_node(self): tm.assert_series_equal(store['a'], ts) + def test_sparse_with_compression(self): + + # GH 2931 + + # make sparse dataframe + df = DataFrame(np.random.binomial(n=1, p=.01, size=(1e3, 10))).to_sparse(fill_value=0) + + # case 1: store uncompressed + self._check_double_roundtrip(df, tm.assert_frame_equal, + compression = False, + check_frame_type=True) + + # case 2: store compressed (works) + self._check_double_roundtrip(df, tm.assert_frame_equal, + compression = 'zlib', + check_frame_type=True) + + # set one series to be completely sparse + df[0] = np.zeros(1e3) + + # case 3: store df with completely sparse series uncompressed + self._check_double_roundtrip(df, tm.assert_frame_equal, + compression = False, + check_frame_type=True) + + # case 4: try storing df with completely sparse series compressed (fails) + self._check_double_roundtrip(df, tm.assert_frame_equal, + compression = 'zlib', + check_frame_type=True) + def test_select(self): wp = tm.makePanel() @@ -1967,7 +1998,7 @@ def _check_double_roundtrip(self, obj, comparator, compression=False, **kwargs): options = {} if compression: - options['complib'] = _default_compressor + options['complib'] = compression or _default_compressor with ensure_clean(self.path, 'w', **options) as store: store['obj'] = obj