diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 686c5ad0165e7..83addc91a772e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -158,6 +158,7 @@ I/O ^^^ - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) +- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) - - - diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 4bbccc8339d7c..725e2d28ffd67 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -226,7 +226,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii, return serialized -def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, +def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, lines=False, chunksize=None, compression='infer'): @@ -278,8 +278,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, typ : type of object to recover (series or frame), default 'frame' dtype : boolean or dict, default True - If True, infer dtypes, if a dict of column to dtype, then use those, + If True, infer dtypes; if a dict of column to dtype, then use those; if False, then don't infer dtypes at all, applies only to the data. + + Not applicable with ``orient='table'``. + + .. versionchanged:: 0.25 + + Not applicable with ``orient='table'``. + convert_axes : boolean, default True Try to convert the axes to the proper dtypes. convert_dates : boolean, default True @@ -408,6 +415,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ + if orient == 'table' and dtype: + raise ValueError("cannot pass both dtype and orient='table'") + + dtype = orient != 'table' if dtype is None else dtype + compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, @@ -600,15 +612,15 @@ class Parser(object): 'us': long(31536000000000), 'ns': long(31536000000000000)} - def __init__(self, json, orient, dtype=True, convert_axes=True, + def __init__(self, json, orient, dtype=None, convert_axes=True, convert_dates=True, keep_default_dates=False, numpy=False, precise_float=False, date_unit=None): self.json = json if orient is None: orient = self._default_orient - self.orient = orient + self.dtype = dtype if orient == "split": diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 6fa3b5b3b2ed4..3002d1dfb5f8a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -502,12 +502,12 @@ class TestTableOrientReader(object): @pytest.mark.parametrize("vals", [ {'ints': [1, 2, 3, 4]}, {'objects': ['a', 'b', 'c', 'd']}, + {'objects': ['1', '2', '3', '4']}, {'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)}, {'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))}, {'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True))}, - pytest.param({'floats': [1., 2., 3., 4.]}, - marks=pytest.mark.xfail), + {'floats': [1., 2., 3., 4.]}, {'floats': [1.1, 2.2, 3.3, 4.4]}, {'bools': [True, False, False, True]}]) def test_read_json_table_orient(self, index_nm, vals, recwarn): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 0ffc8c978a228..fecd0f0572757 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1202,6 +1202,21 @@ def test_data_frame_size_after_to_json(self): assert size_before == size_after + def test_from_json_to_json_table_dtypes(self): + # GH21345 + expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) + dfjson = expected.to_json(orient='table') + result = pd.read_json(dfjson, orient='table') + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [True, {'b': int, 'c': int}]) + def test_read_json_table_dtype_raises(self, dtype): + # GH21345 + df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']}) + dfjson = df.to_json(orient='table') + with pytest.raises(ValueError): + pd.read_json(dfjson, orient='table', dtype=dtype) + @pytest.mark.parametrize('data, expected', [ (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']), {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),