From 2a7316d33ae6759352984b0e8f9e2afb5ca99090 Mon Sep 17 00:00:00 2001 From: Thomas Li Date: Thu, 22 Apr 2021 15:47:25 -0700 Subject: [PATCH] BUG: read_json not reading in large ints properly --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/io/json/_json.py | 17 +++++++++++------ pandas/tests/io/json/test_pandas.py | 7 +++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1a11fffbf6b4e..900e8563440c3 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -784,6 +784,7 @@ I/O - Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`) - Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`) - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`) +- Bug in :func:`read_json` reading large integers incorrectly if dtype is not specified (:issue:`20608`) Period ^^^^^^ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b7493ebeadf34..e5e79729e71a9 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -947,12 +947,17 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): return new_data, True if data.dtype == "object": - - # try float - try: + if len(data) == 0: data = data.astype("float64") - except (TypeError, ValueError): - pass + else: + try: + data = data.astype("int64") + except (TypeError, ValueError, OverflowError): + try: + # Maybe out of integer range, try float + data = data.astype("float64") + except (TypeError, ValueError, OverflowError): + pass if data.dtype.kind == "f": @@ -965,7 +970,7 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): pass # don't coerce 0-len data - if len(data) and (data.dtype == "float" or data.dtype == "object"): + if len(data) and data.dtype == "float": # coerce ints if we can try: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3bd78d44a0b04..49de6c7ead984 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1300,6 +1300,13 @@ def test_read_json_large_numbers2(self): expected = DataFrame(1.404366e21, index=["articleId"], columns=[0]) tm.assert_frame_equal(result, expected) + def test_large_ints_from_json_strings(self, orient): + # GH 20608 + expected = DataFrame([9999999999999999, 10000000000000001]) + df_temp = expected.copy().astype(str) + result = read_json(df_temp.to_json(orient=orient), orient=orient) + tm.assert_frame_equal(result, expected) + def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])