From 20d75d0d2a5c67916c91b9e34413bce9f3cee563 Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Fri, 21 Aug 2015 04:27:33 -0400 Subject: [PATCH] BUG: GH10747 where 'timestamp' is not inferred to be datetime column name --- doc/source/io.rst | 8 +++++++- pandas/io/json.py | 25 ++++++++++++++++++------ pandas/io/tests/test_json/test_pandas.py | 15 +++++++++++--- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 2f2c4c7566413..70e7154493ccf 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1484,7 +1484,13 @@ be set to ``False`` if you need to preserve string-like numbers (e.g. '1', '2') .. note:: - Large integer values may be converted to dates if ``convert_dates=True`` and the data and / or column labels appear 'date-like'. The exact threshold depends on the ``date_unit`` specified. + Large integer values may be converted to dates if ``convert_dates=True`` and the data and / or column labels appear 'date-like'. The exact threshold depends on the ``date_unit`` specified. 'date-like' means that the column label meets one of the following criteria: + + * it ends with ``'_at'`` + * it ends with ``'_time'`` + * it begins with ``'timestamp'`` + * it is ``'modified'`` + * it is ``'date'`` .. warning:: diff --git a/pandas/io/json.py b/pandas/io/json.py index 81a916e058b3d..d6310d81ab87f 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -150,7 +150,18 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, Try to convert the axes to the proper dtypes. convert_dates : boolean, default True List of columns to parse for dates; If True, then try to parse - datelike columns default is True + datelike columns default is True; a column label is datelike if + + * it ends with ``'_at'``, + + * it ends with ``'_time'``, + + * it begins with ``'timestamp'``, + + * it is ``'modified'``, or + + * it is ``'date'`` + keep_default_dates : boolean, default True. If parsing dates, then parse the default datelike columns numpy : boolean, default False @@ -543,11 +554,13 @@ def is_ok(col): if not isinstance(col, compat.string_types): return False - if (col.endswith('_at') or - col.endswith('_time') or - col.lower() == 'modified' or - col.lower() == 'date' or - col.lower() == 'datetime'): + col_lower = col.lower() + if (col_lower.endswith('_at') or + col_lower.endswith('_time') or + col_lower == 'modified' or + col_lower == 'date' or + col_lower == 'datetime' or + col_lower.startswith('timestamp')): return True return False diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index c577286ceca9a..40cdc8fe8478c 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -1,13 +1,12 @@ # pylint: disable-msg=W0612,E1101 from pandas.compat import range, lrange, StringIO, OrderedDict -from pandas import compat import os import numpy as np -from pandas import Series, DataFrame, DatetimeIndex, Timestamp, CategoricalIndex +from pandas import (Series, DataFrame, DatetimeIndex, Timestamp, CategoricalIndex, + read_json, compat) from datetime import timedelta import pandas as pd -read_json = pd.read_json from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network, @@ -574,6 +573,16 @@ def test_convert_dates(self): result = read_json(json, typ='series') assert_series_equal(result, ts) + def test_convert_dates_infer(self): + #GH10747 + infer_words = ['trade_time', 'date', 'datetime', 'sold_at', + 'modified', 'timestamp', 'timestamps'] + for infer_word in infer_words: + data = [{'id': 1, infer_word: 1036713600000}, {'id': 2}] + expected = DataFrame([[1, Timestamp('2002-11-08')], [2, pd.NaT]], columns=['id', infer_word]) + result = read_json(pd.json.dumps(data))[['id', infer_word]] + assert_frame_equal(result, expected) + def test_date_format_frame(self): df = self.tsframe.copy()