From dc4944d120755bc61050cd53e22c9b3d8a672a2e Mon Sep 17 00:00:00 2001 From: Ravi Kumar Nimmi Date: Thu, 16 Jun 2016 21:46:34 +0530 Subject: [PATCH] BUG: fix to_datetime to handle int16 and int8 Fixes #13451 --- doc/source/whatsnew/v0.18.2.txt | 2 ++ pandas/tseries/tests/test_timeseries.py | 27 +++++++++++++++++++++++++ pandas/tseries/tools.py | 6 +++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index db2bccf6ac349..8d2e9bf4c1ae6 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -517,3 +517,5 @@ Bug Fixes - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) + +- Bug in ``DataFrame.to_datetime()`` raises ValueError in case of dtype ``int8`` and ``int16`` (:issue:`13451`) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index fcc544ec7f239..b0caa1f6a77cb 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2563,6 +2563,33 @@ def test_dataframe(self): with self.assertRaises(ValueError): to_datetime(df2) + def test_dataframe_dtypes(self): + # #13451 + df = DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5]}) + + # int16 + result = to_datetime(df.astype('int16')) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # mixed dtypes + df['month'] = df['month'].astype('int8') + df['day'] = df['day'].astype('int8') + result = to_datetime(df) + expected = Series([Timestamp('20150204 00:00:00'), + Timestamp('20160305 00:00:00')]) + assert_series_equal(result, expected) + + # float + df = DataFrame({'year': [2000, 2001], + 'month': [1.5, 1], + 'day': [1, 1]}) + with self.assertRaises(ValueError): + to_datetime(df) + class TestDatetimeIndex(tm.TestCase): _multiprocess_can_split_ = True diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d5e87d1df2462..01b1c8a852215 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -508,7 +508,11 @@ def f(value): def coerce(values): # we allow coercion to if errors allows - return to_numeric(values, errors=errors) + values = to_numeric(values, errors=errors) + # prevent overflow in case of int8 or int16 + if com.is_integer_dtype(values): + values = values.astype('int64', copy=False) + return values values = (coerce(arg[unit_rev['year']]) * 10000 + coerce(arg[unit_rev['month']]) * 100 +