Skip to content

Add sort_keys parameter to Packer for stable dict packing. #164

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 30 additions & 11 deletions msgpack/_packer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ cdef class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enable str8 type for unicode.
:param bool sort_keys:
Sort output dictionaries by key. (default: False)
"""
cdef msgpack_packer pk
cdef object _default
Expand All @@ -72,6 +74,7 @@ cdef class Packer(object):
cdef char *unicode_errors
cdef bool use_float
cdef bint autoreset
cdef bool sort_keys

def __cinit__(self):
cdef int buf_size = 1024*1024
Expand All @@ -82,12 +85,14 @@ cdef class Packer(object):
self.pk.length = 0

def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
use_single_float=False, bint autoreset=1, bint use_bin_type=0):
use_single_float=False, bint autoreset=1, bint use_bin_type=0,
sort_keys=False):
"""
"""
self.use_float = use_single_float
self.autoreset = autoreset
self.pk.use_bin_type = use_bin_type
self.sort_keys = sort_keys
if default is not None:
if not PyCallable_Check(default):
raise TypeError("default must be a callable.")
Expand Down Expand Up @@ -186,22 +191,36 @@ cdef class Packer(object):
raise ValueError("dict is too large")
ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in d.iteritems():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
if self.sort_keys:
for k in sorted(d.keys()):
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(d[k], nest_limit-1)
if ret != 0: break
else:
for k, v in d.iteritems():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif PyDict_Check(o):
L = len(o)
if L > (2**32)-1:
raise ValueError("dict is too large")
ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
if self.sort_keys:
for k in sorted(o.keys()):
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(o[k], nest_limit-1)
if ret != 0: break
else:
for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif isinstance(o, ExtType):
# This should be before Tuple because ExtType is namedtuple.
longval = o.code
Expand Down
13 changes: 10 additions & 3 deletions msgpack/fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,12 +609,16 @@ class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enable str8 type for unicode.
:param bool sort_keys:
Sort output dictionaries by key. (default: False)
"""
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
use_single_float=False, autoreset=True, use_bin_type=False):
use_single_float=False, autoreset=True, use_bin_type=False,
sort_keys=False):
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._sort_keys = sort_keys
self._encoding = encoding
self._unicode_errors = unicode_errors
self._buffer = StringIO()
Expand Down Expand Up @@ -726,8 +730,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
self._pack(obj[i], nest_limit - 1)
return
if isinstance(obj, dict):
return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
nest_limit - 1)
if self._sort_keys:
pairs = sorted(dict_iteritems(obj))
else:
pairs = dict_iteritems(obj)
return self._fb_pack_map_pairs(len(obj), pairs, nest_limit - 1)
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
Expand Down
6 changes: 6 additions & 0 deletions test/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def testMapSize(sizes=[0, 5, 50, 1000]):
for size in sizes:
assert unpacker.unpack() == dict((i, i * 2) for i in range(size))

def testSortKeys(sizes=[3, 31, 127, 1023]):
for size in sizes:
keys = range(1, 1000000000, 1000000000 // size)
map1 = {k: k for k in keys}
map2 = {k: k for k in reversed(keys)}
assert packb(map1, sort_keys=True) == packb(map2, sort_keys=True)

class odict(dict):
'''Reimplement OrderedDict to run test on Python 2.6'''
Expand Down