Skip to content

Commit 9bcdbd9

Browse files
committed
ENH: allow msgpack to accept and return string-like objects in addition to files
1 parent cad683b commit 9bcdbd9

File tree

3 files changed

+110
-18
lines changed

3 files changed

+110
-18
lines changed

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -851,8 +851,8 @@ def to_msgpack(self, path_or_buf, **kwargs):
851851
852852
Parameters
853853
----------
854-
path : string File path
855-
args : an object or objects to serialize
854+
path : string File path, buffer-like, or None
855+
if None, return generated string
856856
append : boolean whether to append to an existing msgpack
857857
(default is False)
858858
compress : type of compressor (zlib or blosc), default to None (no compression)

pandas/io/packers.py

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4141
"""
4242

43+
import os
4344
from datetime import datetime, date, timedelta
4445
from dateutil.parser import parse
4546

@@ -54,6 +55,7 @@
5455
from pandas.sparse.array import BlockIndex, IntIndex
5556
from pandas.core.generic import NDFrame
5657
from pandas.core.common import needs_i8_conversion
58+
from pandas.io.common import get_filepath_or_buffer
5759
from pandas.core.internals import BlockManager, make_block
5860
import pandas.core.internals as internals
5961

@@ -71,7 +73,7 @@
7173
compressor = None
7274

7375

74-
def to_msgpack(path, *args, **kwargs):
76+
def to_msgpack(path_or_buf, *args, **kwargs):
7577
"""
7678
msgpack (serialize) object to input file path
7779
@@ -80,7 +82,8 @@ def to_msgpack(path, *args, **kwargs):
8082
8183
Parameters
8284
----------
83-
path : string File path
85+
path_or_buf : string File path, buffer-like, or None
86+
if None, return generated string
8487
args : an object or objects to serialize
8588
append : boolean whether to append to an existing msgpack
8689
(default is False)
@@ -90,17 +93,24 @@ def to_msgpack(path, *args, **kwargs):
9093
compressor = kwargs.pop('compress', None)
9194
append = kwargs.pop('append', None)
9295
if append:
93-
f = open(path, 'a+b')
96+
mode = 'a+b'
9497
else:
95-
f = open(path, 'wb')
96-
try:
97-
for a in args:
98-
f.write(pack(a, **kwargs))
99-
finally:
100-
f.close()
98+
mode = 'wb'
10199

100+
def writer(fh):
101+
for a in args:
102+
fh.write(pack(a, **kwargs))
103+
return fh
104+
105+
if isinstance(path_or_buf, compat.string_types):
106+
with open(path_or_buf, mode) as fh:
107+
writer(fh)
108+
elif path_or_buf is None:
109+
return writer(compat.BytesIO())
110+
else:
111+
writer(path_or_buf)
102112

103-
def read_msgpack(path, iterator=False, **kwargs):
113+
def read_msgpack(path_or_buf, iterator=False, **kwargs):
104114
"""
105115
Load msgpack pandas object from the specified
106116
file path
@@ -110,8 +120,7 @@ def read_msgpack(path, iterator=False, **kwargs):
110120
111121
Parameters
112122
----------
113-
path : string
114-
File path
123+
path_or_buf : string File path, BytesIO like or string
115124
iterator : boolean, if True, return an iterator to the unpacker
116125
(default is False)
117126
@@ -120,15 +129,40 @@ def read_msgpack(path, iterator=False, **kwargs):
120129
obj : type of object stored in file
121130
122131
"""
132+
path_or_buf, _ = get_filepath_or_buffer(path_or_buf)
123133
if iterator:
124-
return Iterator(path)
134+
return Iterator(path_or_buf)
125135

126-
with open(path, 'rb') as fh:
136+
def read(fh):
127137
l = list(unpack(fh))
128138
if len(l) == 1:
129139
return l[0]
130140
return l
131141

142+
# see if we have an actual file
143+
if isinstance(path_or_buf, compat.string_types):
144+
145+
try:
146+
path_exists = os.path.exists(path_or_buf)
147+
except (TypeError):
148+
path_exists = False
149+
150+
if path_exists:
151+
with open(path_or_buf, 'rb') as fh:
152+
return read(fh)
153+
154+
# treat as a string-like
155+
if not hasattr(path_or_buf,'read'):
156+
157+
try:
158+
fh = compat.BytesIO(path_or_buf)
159+
return read(fh)
160+
finally:
161+
fh.close()
162+
163+
# a buffer like
164+
return read(path_or_buf)
165+
132166
dtype_dict = {21: np.dtype('M8[ns]'),
133167
u('datetime64[ns]'): np.dtype('M8[ns]'),
134168
u('datetime64[us]'): np.dtype('M8[us]'),
@@ -530,10 +564,36 @@ def __init__(self, path, **kwargs):
530564

531565
def __iter__(self):
532566

567+
needs_closing = True
533568
try:
534-
fh = open(self.path, 'rb')
569+
570+
# see if we have an actual file
571+
if isinstance(self.path, compat.string_types):
572+
573+
try:
574+
path_exists = os.path.exists(self.path)
575+
except (TypeError):
576+
path_exists = False
577+
578+
if path_exists:
579+
fh = open(self.path, 'rb')
580+
else:
581+
fh = compat.BytesIO(self.path)
582+
583+
else:
584+
585+
if not hasattr(self.path,'read'):
586+
fh = compat.BytesIO(self.path)
587+
588+
else:
589+
590+
# a file-like
591+
needs_closing = False
592+
fh = self.path
593+
535594
unpacker = unpack(fh)
536595
for o in unpacker:
537596
yield o
538597
finally:
539-
fh.close()
598+
if needs_closing:
599+
fh.close()

pandas/io/tests/test_packers.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,38 @@ def encode_decode(self, x, **kwargs):
5555
to_msgpack(p, x, **kwargs)
5656
return read_msgpack(p, **kwargs)
5757

58+
class TestAPI(Test):
59+
60+
def test_string_io(self):
61+
62+
df = DataFrame(np.random.randn(10,2))
63+
s = df.to_msgpack(None)
64+
result = read_msgpack(s.getvalue())
65+
tm.assert_frame_equal(result,df)
66+
67+
s = to_msgpack(None,df)
68+
result = read_msgpack(s.getvalue())
69+
tm.assert_frame_equal(result, df)
70+
71+
with ensure_clean(self.path) as p:
72+
73+
s = df.to_msgpack(None)
74+
fh = open(p,'wb')
75+
fh.write(s.getvalue())
76+
fh.close()
77+
result = read_msgpack(p)
78+
tm.assert_frame_equal(result, df)
79+
80+
def test_iterator_with_string_io(self):
81+
82+
dfs = [ DataFrame(np.random.randn(10,2)) for i in range(5) ]
83+
s = to_msgpack(None,*dfs)
84+
for i, result in enumerate(read_msgpack(s.getvalue(),iterator=True)):
85+
tm.assert_frame_equal(result,dfs[i])
86+
87+
s = to_msgpack(None,*dfs)
88+
for i, result in enumerate(read_msgpack(s,iterator=True)):
89+
tm.assert_frame_equal(result,dfs[i])
5890

5991
class TestNumpy(Test):
6092

0 commit comments

Comments
 (0)