diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3810ab37822cc..3f944b8862417 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -796,6 +796,7 @@ Datetimelike - Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) - Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`) - Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`) +- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index cbe6dd6c2322d..e0a2b987c98d5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -296,10 +296,15 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -def array_with_unit_to_datetime(ndarray values, object unit, +def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, str errors='coerce'): """ - convert the ndarray according to the unit + Convert the ndarray to datetime according to the time unit. + + This function converts an array of objects into a numpy array of + datetime64[ns]. It returns the converted array + and also returns the timezone offset + if errors: - raise: return converted values or raise OutOfBoundsDatetime if out of range on the conversion or @@ -307,6 +312,18 @@ def array_with_unit_to_datetime(ndarray values, object unit, - ignore: return non-convertible values as the same unit - coerce: NaT for non-convertibles + Parameters + ---------- + values : ndarray of object + Date-like objects to convert + mask : ndarray of bool + Not-a-time mask for non-nullable integer types conversion, + can be None + unit : object + Time unit to use during conversion + errors : str, default 'raise' + Error behavior when parsing + Returns ------- result : ndarray of m8 values @@ -316,7 +333,6 @@ def array_with_unit_to_datetime(ndarray values, object unit, Py_ssize_t i, j, n=len(values) int64_t m ndarray[float64_t] fvalues - ndarray mask bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' bint is_raise = errors=='raise' @@ -329,9 +345,13 @@ def array_with_unit_to_datetime(ndarray values, object unit, if unit == 'ns': if issubclass(values.dtype.type, np.integer): - return values.astype('M8[ns]'), tz - # This will return a tz - return array_to_datetime(values.astype(object), errors=errors) + result = values.astype('M8[ns]') + else: + result, tz = array_to_datetime(values.astype(object), errors=errors) + if mask is not None: + iresult = result.view('i8') + iresult[mask] = NPY_NAT + return result, tz m = cast_from_unit(None, unit) @@ -343,7 +363,9 @@ def array_with_unit_to_datetime(ndarray values, object unit, if values.dtype.kind == "i": # Note: this condition makes the casting="same_kind" redundant iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == NPY_NAT + # If no mask, fill mask by comparing to NPY_NAT constant + if mask is None: + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f193865d90b71..85094ce741134 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -38,6 +38,7 @@ ) from pandas.core.dtypes.missing import notna +from pandas.arrays import IntegerArray from pandas.core import algorithms from pandas.core.algorithms import unique @@ -316,8 +317,21 @@ def _convert_listlike_datetimes( elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") - arg = getattr(arg, "values", arg) - result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + arg = getattr(arg, "_values", arg) + + # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime + # because it expects an ndarray argument + if isinstance(arg, IntegerArray): + # Explicitly pass NaT mask to array_with_unit_to_datetime + mask = arg.isna() + arg = arg._ndarray_values + else: + mask = None + + result, tz_parsed = tslib.array_with_unit_to_datetime( + arg, mask, unit, errors=errors + ) + if errors == "ignore": from pandas import Index diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 1aaacfc0949c3..807d0b05e8d13 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -2291,3 +2291,25 @@ def test_should_cache_errors(unique_share, check_count, err_message): with pytest.raises(AssertionError, match=err_message): tools.should_cache(arg, unique_share, check_count) + + +def test_nullable_integer_to_datetime(): + # Test for #30050 + ser = pd.Series([1, 2, None, 2 ** 61, None]) + ser = ser.astype("Int64") + ser_copy = ser.copy() + + res = pd.to_datetime(ser, unit="ns") + + expected = pd.Series( + [ + np.datetime64("1970-01-01 00:00:00.000000001"), + np.datetime64("1970-01-01 00:00:00.000000002"), + np.datetime64("NaT"), + np.datetime64("2043-01-25 23:56:49.213693952"), + np.datetime64("NaT"), + ] + ) + tm.assert_series_equal(res, expected) + # Check that ser isn't mutated + tm.assert_series_equal(ser, ser_copy)