Skip to content

Commit 42f42c4

Browse files
committed
Merge branch 'json-default-handler' of https://github.com/Komnomnomnom/pandas into Komnomnomnom-json-default-handler
2 parents dd710b5 + a3997c3 commit 42f42c4

File tree

7 files changed

+186
-32
lines changed

7 files changed

+186
-32
lines changed

doc/source/io.rst

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,9 @@ with optional parameters:
10541054
- ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
10551055
- ``force_ascii`` : force encoded string to be ASCII, default True.
10561056
- ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'.
1057+
- ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serialisable object.
10571058

1058-
Note NaN's, NaT's and None will be converted to null and datetime objects will be converted based on the date_format and date_unit parameters.
1059+
Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters.
10591060

10601061
.. ipython:: python
10611062
@@ -1098,6 +1099,48 @@ Writing to a file, with a date index and a date column
10981099
dfj2.to_json('test.json')
10991100
open('test.json').read()
11001101
1102+
If the JSON serialiser cannot handle the container contents directly it will fallback in the following manner:
1103+
1104+
- if a ``toDict`` method is defined by the unrecognised object then that
1105+
will be called and its returned ``dict`` will be JSON serialised.
1106+
- if a ``default_handler`` has been passed to ``to_json`` that will
1107+
be called to convert the object.
1108+
- otherwise an attempt is made to convert the object to a ``dict`` by
1109+
parsing its contents. However if the object is complex this will often fail
1110+
with an ``OverflowError``.
1111+
1112+
Your best bet when encountering ``OverflowError`` during serialisation
1113+
is to specify a ``default_handler``. For example ``timedelta`` can cause
1114+
problems:
1115+
1116+
.. ipython:: python
1117+
:suppress:
1118+
1119+
from datetime import timedelta
1120+
dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
1121+
1122+
.. code-block:: ipython
1123+
1124+
In [141]: from datetime import timedelta
1125+
1126+
In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
1127+
1128+
In [143]: dftd.to_json()
1129+
1130+
---------------------------------------------------------------------------
1131+
OverflowError Traceback (most recent call last)
1132+
OverflowError: Maximum recursion level reached
1133+
1134+
which can be dealt with by specifying a simple ``default_handler``:
1135+
1136+
.. ipython:: python
1137+
1138+
dftd.to_json(default_handler=str)
1139+
1140+
def my_handler(obj):
1141+
return obj.total_seconds()
1142+
dftd.to_json(default_handler=my_handler)
1143+
11011144
Reading JSON
11021145
~~~~~~~~~~~~
11031146

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ API Changes
234234

235235
- added ``date_unit`` parameter to specify resolution of timestamps. Options
236236
are seconds, milliseconds, microseconds and nanoseconds. (:issue:`4362`, :issue:`4498`).
237+
- added ``default_handler`` parameter to allow a callable to be passed which will be
238+
responsible for handling otherwise unserialisable objects. (:issue:`5138`)
237239

238240
- ``Index`` and ``MultiIndex`` changes (:issue:`4039`):
239241

pandas/core/generic.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,8 @@ def __setstate__(self, state):
707707
# I/O Methods
708708

709709
def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
710-
double_precision=10, force_ascii=True, date_unit='ms'):
710+
double_precision=10, force_ascii=True, date_unit='ms',
711+
default_handler=None):
711712
"""
712713
Convert the object to a JSON string.
713714
@@ -728,25 +729,32 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
728729
* DataFrame
729730
730731
- default is 'columns'
731-
- allowed values are: {'split','records','index','columns','values'}
732+
- allowed values are:
733+
{'split','records','index','columns','values'}
732734
733735
* The format of the JSON string
734736
735-
- split : dict like {index -> [index], columns -> [columns], data -> [values]}
736-
- records : list like [{column -> value}, ... , {column -> value}]
737+
- split : dict like
738+
{index -> [index], columns -> [columns], data -> [values]}
739+
- records : list like
740+
[{column -> value}, ... , {column -> value}]
737741
- index : dict like {index -> {column -> value}}
738742
- columns : dict like {column -> {index -> value}}
739743
- values : just the values array
740744
741-
date_format : type of date conversion (epoch = epoch milliseconds, iso = ISO8601)
742-
default is epoch
745+
date_format : type of date conversion, epoch or iso
746+
epoch = epoch milliseconds, iso = ISO8601, default is epoch
743747
double_precision : The number of decimal places to use when encoding
744748
floating point values, default 10.
745749
force_ascii : force encoded string to be ASCII, default True.
746750
date_unit : string, default 'ms' (milliseconds)
747751
The time unit to encode to, governs timestamp and ISO8601
748752
precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
749753
microsecond, and nanosecond respectively.
754+
default_handler : callable, default None
755+
Handler to call if object cannot otherwise be converted to a
756+
suitable format for JSON. Should receive a single argument which is
757+
the object to convert and return a serialisable object.
750758
751759
Returns
752760
-------
@@ -761,7 +769,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch',
761769
date_format=date_format,
762770
double_precision=double_precision,
763771
force_ascii=force_ascii,
764-
date_unit=date_unit)
772+
date_unit=date_unit,
773+
default_handler=default_handler)
765774

766775
def to_hdf(self, path_or_buf, key, **kwargs):
767776
""" activate the HDFStore

pandas/io/json.py

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,21 @@
1717
dumps = _json.dumps
1818
### interface to/from ###
1919

20+
2021
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
21-
double_precision=10, force_ascii=True, date_unit='ms'):
22+
double_precision=10, force_ascii=True, date_unit='ms',
23+
default_handler=None):
2224

2325
if isinstance(obj, Series):
2426
s = SeriesWriter(
2527
obj, orient=orient, date_format=date_format,
2628
double_precision=double_precision, ensure_ascii=force_ascii,
27-
date_unit=date_unit).write()
29+
date_unit=date_unit, default_handler=default_handler).write()
2830
elif isinstance(obj, DataFrame):
2931
s = FrameWriter(
3032
obj, orient=orient, date_format=date_format,
3133
double_precision=double_precision, ensure_ascii=force_ascii,
32-
date_unit=date_unit).write()
34+
date_unit=date_unit, default_handler=default_handler).write()
3335
else:
3436
raise NotImplementedError
3537

@@ -45,7 +47,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
4547
class Writer(object):
4648

4749
def __init__(self, obj, orient, date_format, double_precision,
48-
ensure_ascii, date_unit):
50+
ensure_ascii, date_unit, default_handler=None):
4951
self.obj = obj
5052

5153
if orient is None:
@@ -56,6 +58,7 @@ def __init__(self, obj, orient, date_format, double_precision,
5658
self.double_precision = double_precision
5759
self.ensure_ascii = ensure_ascii
5860
self.date_unit = date_unit
61+
self.default_handler = default_handler
5962

6063
self.is_copy = False
6164
self._format_axes()
@@ -70,7 +73,9 @@ def write(self):
7073
double_precision=self.double_precision,
7174
ensure_ascii=self.ensure_ascii,
7275
date_unit=self.date_unit,
73-
iso_dates=self.date_format == 'iso')
76+
iso_dates=self.date_format == 'iso',
77+
default_handler=self.default_handler)
78+
7479

7580
class SeriesWriter(Writer):
7681
_default_orient = 'index'
@@ -121,13 +126,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
121126
122127
- default is ``'columns'``
123128
- allowed values are: {'split','records','index','columns','values'}
124-
- The DataFrame index must be unique for orients 'index' and 'columns'.
125-
- The DataFrame columns must be unique for orients 'index', 'columns', and 'records'.
129+
- The DataFrame index must be unique for orients 'index' and
130+
'columns'.
131+
- The DataFrame columns must be unique for orients 'index',
132+
'columns', and 'records'.
126133
127134
* The format of the JSON string
128135
129-
- split : dict like ``{index -> [index], columns -> [columns], data -> [values]}``
130-
- records : list like ``[{column -> value}, ... , {column -> value}]``
136+
- split : dict like
137+
``{index -> [index], columns -> [columns], data -> [values]}``
138+
- records : list like
139+
``[{column -> value}, ... , {column -> value}]``
131140
- index : dict like ``{index -> {column -> value}}``
132141
- columns : dict like ``{column -> {index -> value}}``
133142
- values : just the values array
@@ -384,7 +393,6 @@ class SeriesParser(Parser):
384393
_default_orient = 'index'
385394
_split_keys = ('name', 'index', 'data')
386395

387-
388396
def _parse_no_numpy(self):
389397

390398
json = self.json
@@ -542,7 +550,7 @@ def is_ok(col):
542550
#----------------------------------------------------------------------
543551
# JSON normalization routines
544552

545-
def nested_to_record(ds,prefix="",level=0):
553+
def nested_to_record(ds, prefix="", level=0):
546554
"""a simplified json_normalize
547555
548556
converts a nested dict into a flat dict ("record"), unlike json_normalize,
@@ -557,7 +565,8 @@ def nested_to_record(ds,prefix="",level=0):
557565
d - dict or list of dicts, matching `ds`
558566
559567
Example:
560-
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),nested=dict(e=dict(c=1,d=2),d=2)))
568+
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
569+
nested=dict(e=dict(c=1,d=2),d=2)))
561570
Out[52]:
562571
{'dict1.c': 1,
563572
'dict1.d': 2,
@@ -567,31 +576,31 @@ def nested_to_record(ds,prefix="",level=0):
567576
'nested.e.d': 2}
568577
"""
569578
singleton = False
570-
if isinstance(ds,dict):
579+
if isinstance(ds, dict):
571580
ds = [ds]
572581
singleton = True
573582

574583
new_ds = []
575584
for d in ds:
576585

577586
new_d = copy.deepcopy(d)
578-
for k,v in d.items():
587+
for k, v in d.items():
579588
# each key gets renamed with prefix
580589
if level == 0:
581590
newkey = str(k)
582591
else:
583-
newkey = prefix+'.'+ str(k)
592+
newkey = prefix + '.' + str(k)
584593

585594
# only dicts gets recurse-flattend
586595
# only at level>1 do we rename the rest of the keys
587-
if not isinstance(v,dict):
588-
if level!=0: # so we skip copying for top level, common case
596+
if not isinstance(v, dict):
597+
if level != 0: # so we skip copying for top level, common case
589598
v = new_d.pop(k)
590-
new_d[newkey]= v
599+
new_d[newkey] = v
591600
continue
592601
else:
593602
v = new_d.pop(k)
594-
new_d.update(nested_to_record(v,newkey,level+1))
603+
new_d.update(nested_to_record(v, newkey, level+1))
595604
new_ds.append(new_d)
596605

597606
if singleton:
@@ -663,13 +672,14 @@ def _pull_field(js, spec):
663672
data = [data]
664673

665674
if record_path is None:
666-
if any([isinstance(x,dict) for x in compat.itervalues(data[0])]):
675+
if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
667676
# naive normalization, this is idempotent for flat records
668677
# and potentially will inflate the data considerably for
669678
# deeply nested structures:
670679
# {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
671680
#
672-
# TODO: handle record value which are lists, at least error reasonabley
681+
# TODO: handle record value which are lists, at least error
682+
# reasonably
673683
data = nested_to_record(data)
674684
return DataFrame(data)
675685
elif not isinstance(record_path, list):

pandas/io/tests/test_json/test_pandas.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,3 +575,16 @@ def test_url(self):
575575

576576
url = 'http://search.twitter.com/search.json?q=pandas%20python'
577577
result = read_json(url)
578+
579+
def test_default_handler(self):
580+
from datetime import timedelta
581+
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
582+
self.assertRaises(OverflowError, frame.to_json)
583+
expected = DataFrame([str(timedelta(23)), str(timedelta(seconds=5))])
584+
assert_frame_equal(
585+
expected, pd.read_json(frame.to_json(default_handler=str)))
586+
587+
def my_handler_raises(obj):
588+
raise TypeError
589+
self.assertRaises(
590+
TypeError, frame.to_json, default_handler=my_handler_raises)

pandas/io/tests/test_json/test_ujson.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,51 @@ def toDict(self):
848848
dec = ujson.decode(output)
849849
self.assertEquals(dec, d)
850850

851+
def test_defaultHandler(self):
852+
853+
class _TestObject(object):
854+
855+
def __init__(self, val):
856+
self.val = val
857+
858+
@property
859+
def recursive_attr(self):
860+
return _TestObject("recursive_attr")
861+
862+
def __str__(self):
863+
return str(self.val)
864+
865+
self.assertRaises(OverflowError, ujson.encode, _TestObject("foo"))
866+
self.assertEquals('"foo"', ujson.encode(_TestObject("foo"),
867+
default_handler=str))
868+
869+
def my_handler(obj):
870+
return "foobar"
871+
self.assertEquals('"foobar"', ujson.encode(_TestObject("foo"),
872+
default_handler=my_handler))
873+
874+
def my_handler_raises(obj):
875+
raise TypeError("I raise for anything")
876+
with tm.assertRaisesRegexp(TypeError, "I raise for anything"):
877+
ujson.encode(_TestObject("foo"), default_handler=my_handler_raises)
878+
879+
def my_int_handler(obj):
880+
return 42
881+
self.assertEquals(
882+
42, ujson.decode(ujson.encode(_TestObject("foo"),
883+
default_handler=my_int_handler)))
884+
885+
def my_obj_handler(obj):
886+
return datetime.datetime(2013, 2, 3)
887+
self.assertEquals(
888+
ujson.decode(ujson.encode(datetime.datetime(2013, 2, 3))),
889+
ujson.decode(ujson.encode(_TestObject("foo"),
890+
default_handler=my_obj_handler)))
891+
892+
l = [_TestObject("foo"), _TestObject("bar")]
893+
self.assertEquals(json.loads(json.dumps(l, default=str)),
894+
ujson.decode(ujson.encode(l, default_handler=str)))
895+
851896

852897
class NumpyJSONTests(TestCase):
853898

0 commit comments

Comments
 (0)