Skip to content

Commit 6422041

Browse files
committed
API: to_json now writes to a file by default (if None is provided it will return a StringIO object)
read_json will read from a string-like or filebuf or url (consistent with other parsers)
1 parent a9dafe3 commit 6422041

File tree

8 files changed

+122
-57
lines changed

8 files changed

+122
-57
lines changed

doc/source/io.rst

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,6 @@ The Series object also has a ``to_string`` method, but with only the ``buf``,
939939
which, if set to ``True``, will additionally output the length of the Series.
940940

941941

942-
943942
JSON
944943
----
945944

@@ -953,6 +952,8 @@ Writing JSON
953952
A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
954953
with optional parameters:
955954

955+
- path_or_buf : the pathname or buffer to write the output
956+
This can be ``None`` in which case a ``StringIO`` converted string is returned
956957
- orient : The format of the JSON string, default is ``index`` for ``Series``, ``columns`` for ``DataFrame``
957958

958959
* split : dict like {index -> [index], columns -> [columns], data -> [values]}
@@ -969,8 +970,8 @@ Note NaN's and None will be converted to null and datetime objects will be conve
969970
.. ipython:: python
970971
971972
df = DataFrame(randn(10, 2), columns=list('AB'))
972-
s = df.to_json()
973-
s
973+
json = df.to_json(None)
974+
json.getvalue()
974975
975976
Reading JSON
976977
~~~~~~~~~~~~
@@ -979,7 +980,11 @@ Reading a JSON string to pandas object can take a number of parameters.
979980
The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or
980981
is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
981982

982-
- json : The JSON string to parse.
983+
- filepath_or_buffer : a **VALID** JSON string or file handle / StringIO. The string could be
984+
a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
985+
is expected. For instance, a local file could be
986+
file ://localhost/path/to/table.json
987+
- json : a VALID JSON string, optional, used if filepath_or_buffer is not provided
983988
- typ : type of object to recover (series or frame), default 'frame'
984989
- orient : The format of the JSON string, one of the following
985990

@@ -993,9 +998,17 @@ is ``None``. To explicity force ``Series`` parsing, pass ``typ=series``
993998
The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is
994999
not parsable.
9951000

1001+
Reading from a JSON string
1002+
1003+
.. ipython:: python
1004+
1005+
pd.read_json(json='{"0":{"0":1,"1":3},"1":{"0":2,"1":4}}')
1006+
1007+
Reading from a StringIO
1008+
9961009
.. ipython:: python
9971010
998-
pd.read_json(s)
1011+
pd.read_json(json)
9991012
10001013
HTML
10011014
----

pandas/core/generic.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ def to_clipboard(self):
495495
from pandas.io import clipboard
496496
clipboard.to_clipboard(self)
497497

498-
def to_json(self, orient=None, double_precision=10,
498+
def to_json(self, path_or_buf, orient=None, double_precision=10,
499499
force_ascii=True):
500500
"""
501501
Convert the object to a JSON string.
@@ -505,6 +505,8 @@ def to_json(self, orient=None, double_precision=10,
505505
506506
Parameters
507507
----------
508+
path_or_buf : the path or buffer to write the result string
509+
if this is None, return a StringIO of the converted string
508510
orient : {'split', 'records', 'index', 'columns', 'values'},
509511
default is 'index' for Series, 'columns' for DataFrame
510512
@@ -521,10 +523,13 @@ def to_json(self, orient=None, double_precision=10,
521523
522524
Returns
523525
-------
524-
result : JSON compatible string
526+
result : a JSON compatible string written to the path_or_buf;
527+
if the path_or_buf is none, return a StringIO of the result
528+
525529
"""
530+
526531
from pandas.io import json
527-
return json.to_json(self, orient=orient, double_precision=double_precision,
532+
return json.to_json(path_or_buf, self, orient=orient, double_precision=double_precision,
528533
force_ascii=force_ascii)
529534

530535
# install the indexerse

pandas/io/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import urlparse
44
from pandas.util import py3compat
5+
from StringIO import StringIO
56

67
_VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc +
78
urlparse.uses_params)

pandas/io/excel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas.io.parsers import TextParser
1313
from pandas.tseries.period import Period
14-
import json
14+
from pandas import json
1515

1616
def read_excel(path_or_buf, sheetname, kind=None, **kwds):
1717
"""Read an Excel table into a pandas DataFrame

pandas/io/json.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11

22
# pylint: disable-msg=E1101,W0613,W0603
33
from pandas import Series, DataFrame
4+
from pandas.io.common import get_filepath_or_buffer
5+
from StringIO import StringIO
46

57
import pandas.json as _json
68
loads = _json.loads
79
dumps = _json.dumps
810

911
### interface to/from ###
1012

11-
def to_json(obj, orient=None, double_precision=10,
13+
def to_json(path_or_buf, obj, orient=None, double_precision=10,
1214
force_ascii=True):
1315
"""
14-
Convert the object to a JSON string.
16+
Convert the object to a JSON string
1517
1618
Note NaN's and None will be converted to null and datetime objects
1719
will be converted to UNIX timestamps.
1820
1921
Parameters
2022
----------
23+
path_or_buf : the pathname or buffer to write the output
24+
if this is None, return a StringIO of the converted string
2125
orient : {'split', 'records', 'index', 'columns', 'values'},
2226
default is 'index' for Series, 'columns' for DataFrame
2327
@@ -34,7 +38,9 @@ def to_json(obj, orient=None, double_precision=10,
3438
3539
Returns
3640
-------
37-
result : JSON compatible string
41+
result : a JSON compatible string written to the path_or_buf;
42+
if the path_or_buf is none, return a StringIO of the result
43+
3844
"""
3945

4046
if orient is None:
@@ -43,16 +49,27 @@ def to_json(obj, orient=None, double_precision=10,
4349
elif isinstance(obj, DataFrame):
4450
orient = 'columns'
4551

46-
return dumps(obj, orient=orient, double_precision=double_precision,
47-
ensure_ascii=force_ascii)
52+
s = dumps(obj, orient=orient, double_precision=double_precision,
53+
ensure_ascii=force_ascii)
54+
if isinstance(path_or_buf, basestring):
55+
with open(path_or_buf,'w') as fh:
56+
fh.write(s)
57+
elif path_or_buf is None:
58+
return StringIO(s)
59+
else:
60+
path_or_buf.write(s)
4861

49-
def read_json(json, typ='frame', orient=None, dtype=None, numpy=True):
62+
def read_json(filepath_or_buffer=None, json=None, typ='frame', orient=None, dtype=None, numpy=True):
5063
"""
5164
Convert JSON string to pandas object
5265
5366
Parameters
5467
----------
55-
json : The JSON string to parse.
68+
filepath_or_buffer : a VALID JSON StringIO or file handle / StringIO. The string could be
69+
a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
70+
is expected. For instance, a local file could be
71+
file ://localhost/path/to/table.json
72+
json : a VALID JSON string, optional, used if filepath_or_buffer is not provided
5673
typ : type of object to recover (series or frame), default 'frame'
5774
orient : {'split', 'records', 'index'}, default 'index'
5875
The format of the JSON string
@@ -69,6 +86,16 @@ def read_json(json, typ='frame', orient=None, dtype=None, numpy=True):
6986
result : Series or DataFrame
7087
"""
7188

89+
if json is None:
90+
filepath_or_buffer,_ = get_filepath_or_buffer(filepath_or_buffer)
91+
if isinstance(filepath_or_buffer, basestring):
92+
with open(filepath_or_buffer,'r') as fh:
93+
json = fh.read()
94+
elif hasattr(filepath_or_buffer, 'read'):
95+
json = filepath_or_buffer.read()
96+
else:
97+
json = filepath_or_buffer
98+
7299
obj = None
73100
if typ == 'frame':
74101
if orient is None:

pandas/io/parsers.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import pandas.tslib as tslib
2424
import pandas.parser as _parser
2525
from pandas.tseries.period import Period
26-
import json
2726

2827

2928
class DateConversionError(Exception):

pandas/io/tests/test_json/test_pandas.py

100644100755
Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
read_json = pd.read_json
1616

1717
from pandas.util.testing import (assert_almost_equal, assert_frame_equal,
18-
assert_series_equal)
18+
assert_series_equal, network)
1919
import pandas.util.testing as tm
20+
from numpy.testing.decorators import slow
2021

2122
_seriesd = tm.getSeriesData()
2223
_tsd = tm.getTimeSeriesData()
@@ -56,7 +57,7 @@ def test_frame_from_json_to_json(self):
5657

5758
def _check_orient(df, orient, dtype=None, numpy=True):
5859
df = df.sort()
59-
dfjson = df.to_json(orient=orient)
60+
dfjson = df.to_json(None, orient=orient)
6061
unser = read_json(dfjson, orient=orient, dtype=dtype,
6162
numpy=numpy)
6263
unser = unser.sort()
@@ -93,8 +94,8 @@ def _check_all_orients(df, dtype=None):
9394

9495
# basic
9596
_check_all_orients(self.frame)
96-
self.assertEqual(self.frame.to_json(),
97-
self.frame.to_json(orient="columns"))
97+
self.assertEqual(self.frame.to_json(None).read(),
98+
self.frame.to_json(None,orient="columns").read())
9899

99100
_check_all_orients(self.intframe, dtype=self.intframe.values.dtype)
100101

@@ -138,61 +139,61 @@ def _check_all_orients(df, dtype=None):
138139
_check_orient(df.transpose().transpose(), "index")
139140

140141
def test_frame_from_json_bad_data(self):
141-
self.assertRaises(ValueError, read_json, '{"key":b:a:d}')
142+
self.assertRaises(ValueError, read_json, StringIO('{"key":b:a:d}'))
142143

143144
# too few indices
144-
json = ('{"columns":["A","B"],'
145-
'"index":["2","3"],'
146-
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
145+
json = StringIO('{"columns":["A","B"],'
146+
'"index":["2","3"],'
147+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
147148
self.assertRaises(ValueError, read_json, json,
148149
orient="split")
149150

150151
# too many columns
151-
json = ('{"columns":["A","B","C"],'
152-
'"index":["1","2","3"],'
153-
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
152+
json = StringIO('{"columns":["A","B","C"],'
153+
'"index":["1","2","3"],'
154+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
154155
self.assertRaises(AssertionError, read_json, json,
155156
orient="split")
156157

157158
# bad key
158-
json = ('{"badkey":["A","B"],'
159-
'"index":["2","3"],'
160-
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
159+
json = StringIO('{"badkey":["A","B"],'
160+
'"index":["2","3"],'
161+
'"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}"')
161162
self.assertRaises(TypeError, read_json, json,
162163
orient="split")
163164

164165
def test_frame_from_json_nones(self):
165166
df = DataFrame([[1, 2], [4, 5, 6]])
166-
unser = read_json(df.to_json())
167+
unser = read_json(df.to_json(None))
167168
self.assert_(np.isnan(unser['2'][0]))
168169

169170
df = DataFrame([['1', '2'], ['4', '5', '6']])
170-
unser = read_json(df.to_json())
171+
unser = read_json(df.to_json(None))
171172
self.assert_(unser['2'][0] is None)
172173

173-
unser = read_json(df.to_json(), numpy=False)
174+
unser = read_json(df.to_json(None), numpy=False)
174175
self.assert_(unser['2'][0] is None)
175176

176177
# infinities get mapped to nulls which get mapped to NaNs during
177178
# deserialisation
178179
df = DataFrame([[1, 2], [4, 5, 6]])
179180
df[2][0] = np.inf
180-
unser = read_json(df.to_json())
181+
unser = read_json(df.to_json(None))
181182
self.assert_(np.isnan(unser['2'][0]))
182183

183184
df[2][0] = np.NINF
184-
unser = read_json(df.to_json())
185+
unser = read_json(df.to_json(None))
185186
self.assert_(np.isnan(unser['2'][0]))
186187

187188
def test_frame_to_json_except(self):
188189
df = DataFrame([1, 2, 3])
189-
self.assertRaises(ValueError, df.to_json, orient="garbage")
190+
self.assertRaises(ValueError, df.to_json, None, orient="garbage")
190191

191192
def test_series_from_json_to_json(self):
192193

193194
def _check_orient(series, orient, dtype=None, numpy=True):
194195
series = series.sort_index()
195-
unser = read_json(series.to_json(orient=orient), typ='series',
196+
unser = read_json(series.to_json(None,orient=orient), typ='series',
196197
orient=orient, numpy=numpy, dtype=dtype)
197198
unser = unser.sort_index()
198199
if series.index.dtype.type == np.datetime64:
@@ -222,8 +223,8 @@ def _check_all_orients(series, dtype=None):
222223

223224
# basic
224225
_check_all_orients(self.series)
225-
self.assertEqual(self.series.to_json(),
226-
self.series.to_json(orient="index"))
226+
self.assertEqual(self.series.to_json(None).read(),
227+
self.series.to_json(None,orient="index").read())
227228

228229
objSeries = Series([str(d) for d in self.objSeries],
229230
index=self.objSeries.index,
@@ -239,18 +240,35 @@ def _check_all_orients(series, dtype=None):
239240

240241
def test_series_to_json_except(self):
241242
s = Series([1, 2, 3])
242-
self.assertRaises(ValueError, s.to_json, orient="garbage")
243+
self.assertRaises(ValueError, s.to_json, None, orient="garbage")
243244

244245
def test_typ(self):
245246

246247
s = Series(range(6), index=['a','b','c','d','e','f'])
247-
result = read_json(s.to_json(),typ=None)
248+
result = read_json(s.to_json(None),typ=None)
248249
assert_series_equal(result,s)
249250

250251
def test_reconstruction_index(self):
251252

252253
df = DataFrame([[1, 2, 3], [4, 5, 6]])
253-
result = read_json(df.to_json())
254+
result = read_json(df.to_json(None))
254255

255256
# the index is serialized as strings....correct?
256257
#assert_frame_equal(result,df)
258+
259+
@network
260+
@slow
261+
def test_url(self):
262+
import urllib2
263+
try:
264+
# HTTP(S)
265+
url = 'https://api.github.com/repos/pydata/pandas/issues?per_page=5'
266+
result = read_json(url)
267+
#print result
268+
269+
url = 'http://search.twitter.com/search.json?q=pandas%20python'
270+
result = read_json(url)
271+
#print result
272+
273+
except urllib2.URLError:
274+
raise nose.SkipTest

0 commit comments

Comments
 (0)