diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 0a6513c0..8b736eab 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -63,6 +63,8 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool sort_keys: + Sort output dictionaries by key. (default: False) """ cdef msgpack_packer pk cdef object _default @@ -72,6 +74,7 @@ cdef class Packer(object): cdef char *unicode_errors cdef bool use_float cdef bint autoreset + cdef bool sort_keys def __cinit__(self): cdef int buf_size = 1024*1024 @@ -82,12 +85,14 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1, bint use_bin_type=0): + use_single_float=False, bint autoreset=1, bint use_bin_type=0, + sort_keys=False): """ """ self.use_float = use_single_float self.autoreset = autoreset self.pk.use_bin_type = use_bin_type + self.sort_keys = sort_keys if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") @@ -186,22 +191,36 @@ cdef class Packer(object): raise ValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: - for k, v in d.iteritems(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break + if self.sort_keys: + for k in sorted(d.keys()): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(d[k], nest_limit-1) + if ret != 0: break + else: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break elif PyDict_Check(o): L = len(o) if L > (2**32)-1: raise ValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit-1) - if ret != 0: break - ret = self._pack(v, nest_limit-1) - if ret != 0: break + if self.sort_keys: + for k in sorted(o.keys()): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(o[k], nest_limit-1) + if ret != 0: break + else: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break elif isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. longval = o.code diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f682611d..58d15086 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -609,12 +609,16 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool sort_keys: + Sort output dictionaries by key. (default: False) """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, autoreset=True, use_bin_type=False): + use_single_float=False, autoreset=True, use_bin_type=False, + sort_keys=False): self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type + self._sort_keys = sort_keys self._encoding = encoding self._unicode_errors = unicode_errors self._buffer = StringIO() @@ -726,8 +730,11 @@ def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): self._pack(obj[i], nest_limit - 1) return if isinstance(obj, dict): - return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), - nest_limit - 1) + if self._sort_keys: + pairs = sorted(dict_iteritems(obj)) + else: + pairs = dict_iteritems(obj) + return self._fb_pack_map_pairs(len(obj), pairs, nest_limit - 1) if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 diff --git a/test/test_pack.py b/test/test_pack.py index 762ccf51..6061b2cf 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -128,6 +128,12 @@ def testMapSize(sizes=[0, 5, 50, 1000]): for size in sizes: assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) +def testSortKeys(sizes=[3, 31, 127, 1023]): + for size in sizes: + keys = range(1, 1000000000, 1000000000 // size) + map1 = {k: k for k in keys} + map2 = {k: k for k in reversed(keys)} + assert packb(map1, sort_keys=True) == packb(map2, sort_keys=True) class odict(dict): '''Reimplement OrderedDict to run test on Python 2.6'''