diff --git a/doc/source/release.rst b/doc/source/release.rst index 17fe4be734f4d..59ff48887269e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -85,7 +85,7 @@ Experimental Features (:issue:`4897`). - Add msgpack support via ``pd.read_msgpack()`` and ``pd.to_msgpack()`` / ``df.to_msgpack()`` for serialization of arbitrary pandas (and python - objects) in a lightweight portable binary format (:issue:`686`) + objects) in a lightweight portable binary format (:issue:`686`, :issue:`5506`) - Added PySide support for the qtpandas DataFrameModel and DataFrameWidget. - Added :mod:`pandas.io.gbq` for reading from (and writing to) Google BigQuery into a DataFrame. (:issue:`4140`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba2ba1b482dee..efa083e239f63 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -842,7 +842,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) - def to_msgpack(self, path_or_buf, **kwargs): + def to_msgpack(self, path_or_buf=None, **kwargs): """ msgpack (serialize) object to input file path diff --git a/pandas/io/packers.py b/pandas/io/packers.py index adb70a92b8a54..08299738f31a2 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -100,13 +100,14 @@ def to_msgpack(path_or_buf, *args, **kwargs): def writer(fh): for a in args: fh.write(pack(a, **kwargs)) - return fh if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) elif path_or_buf is None: - return writer(compat.BytesIO()) + buf = compat.BytesIO() + writer(buf) + return buf.getvalue() else: writer(path_or_buf) @@ -263,17 +264,23 @@ def encode(obj): return {'typ': 'period_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), - 'freq': obj.freqstr, + 'freq': getattr(obj,'freqstr',None), 'dtype': obj.dtype.num, 'data': convert(obj.asi8)} elif isinstance(obj, DatetimeIndex): + tz = getattr(obj,'tz',None) + + # store tz info and data as UTC + if tz is not None: + tz = tz.zone + obj = obj.tz_convert('UTC') return {'typ': 'datetime_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), 'dtype': obj.dtype.num, 'data': convert(obj.asi8), - 'freq': obj.freqstr, - 'tz': obj.tz} + 'freq': getattr(obj,'freqstr',None), + 'tz': tz } elif isinstance(obj, MultiIndex): return {'typ': 'multi_index', 'klass': obj.__class__.__name__, @@ -440,7 +447,13 @@ def decode(obj): return globals()[obj['klass']](data, name=obj['name'], freq=obj['freq']) elif typ == 'datetime_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) - return globals()[obj['klass']](data, freq=obj['freq'], tz=obj['tz'], name=obj['name']) + result = globals()[obj['klass']](data, freq=obj['freq'], name=obj['name']) + tz = obj['tz'] + + # reverse tz conversion + if tz is not None: + result = result.tz_localize('UTC').tz_convert(tz) + return result elif typ == 'series': dtype = dtype_for(obj['dtype']) index = obj['index'] diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index e5938ecf87b68..6b986fa87ccce 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -61,18 +61,26 @@ def test_string_io(self): df = DataFrame(np.random.randn(10,2)) s = df.to_msgpack(None) - result = read_msgpack(s.getvalue()) + result = read_msgpack(s) + tm.assert_frame_equal(result,df) + + s = df.to_msgpack() + result = read_msgpack(s) + tm.assert_frame_equal(result,df) + + s = df.to_msgpack() + result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result,df) s = to_msgpack(None,df) - result = read_msgpack(s.getvalue()) + result = read_msgpack(s) tm.assert_frame_equal(result, df) with ensure_clean(self.path) as p: - s = df.to_msgpack(None) + s = df.to_msgpack() fh = open(p,'wb') - fh.write(s.getvalue()) + fh.write(s) fh.close() result = read_msgpack(p) tm.assert_frame_equal(result, df) @@ -80,10 +88,6 @@ def test_string_io(self): def test_iterator_with_string_io(self): dfs = [ DataFrame(np.random.randn(10,2)) for i in range(5) ] - s = to_msgpack(None,*dfs) - for i, result in enumerate(read_msgpack(s.getvalue(),iterator=True)): - tm.assert_frame_equal(result,dfs[i]) - s = to_msgpack(None,*dfs) for i, result in enumerate(read_msgpack(s,iterator=True)): tm.assert_frame_equal(result,dfs[i]) @@ -98,7 +102,7 @@ def test_numpy_scalar_float(self): def test_numpy_scalar_complex(self): x = np.complex64(np.random.rand() + 1j * np.random.rand()) x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(np.allclose(x, x_rec)) def test_scalar_float(self): x = np.random.rand() @@ -108,10 +112,9 @@ def test_scalar_float(self): def test_scalar_complex(self): x = np.random.rand() + 1j * np.random.rand() x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(np.allclose(x, x_rec)) def test_list_numpy_float(self): - raise nose.SkipTest('buggy test') x = [np.float32(np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) tm.assert_almost_equal(x,x_rec) @@ -120,13 +123,11 @@ def test_list_numpy_float_complex(self): if not hasattr(np, 'complex128'): raise nose.SkipTest('numpy cant handle complex128') - # buggy test - raise nose.SkipTest('buggy test') x = [np.float32(np.random.rand()) for i in range(5)] + \ [np.complex128(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(np.allclose(x, x_rec)) def test_list_float(self): x = [np.random.rand() for i in range(5)] @@ -137,7 +138,7 @@ def test_list_float_complex(self): x = [np.random.rand() for i in range(5)] + \ [(np.random.rand() + 1j * np.random.rand()) for i in range(5)] x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(np.allclose(x, x_rec)) def test_dict_float(self): x = {'foo': 1.0, 'bar': 2.0} @@ -147,7 +148,8 @@ def test_dict_float(self): def test_dict_complex(self): x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) def test_dict_numpy_float(self): x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)} @@ -158,7 +160,9 @@ def test_dict_numpy_complex(self): x = {'foo': np.complex128( 1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)} x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) + def test_numpy_array_float(self): @@ -173,7 +177,8 @@ def test_numpy_array_float(self): def test_numpy_array_complex(self): x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) x_rec = self.encode_decode(x) - tm.assert_almost_equal(x,x_rec) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + x.dtype == x_rec.dtype) def test_list_mixed(self): x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')] @@ -235,6 +240,16 @@ def test_basic_index(self): i_rec = self.encode_decode(i) self.assert_(i.equals(i_rec)) + # datetime with no freq (GH5506) + i = Index([Timestamp('20130101'),Timestamp('20130103')]) + i_rec = self.encode_decode(i) + self.assert_(i.equals(i_rec)) + + # datetime with timezone + i = Index([Timestamp('20130101 9:00:00'),Timestamp('20130103 11:00:00')]).tz_localize('US/Eastern') + i_rec = self.encode_decode(i) + self.assert_(i.equals(i_rec)) + def test_multi_index(self): for s, i in self.mi.items():