Skip to content

BUG: to_datetime with unit with Int64 #30241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ Datetimelike
- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`)
- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`)
- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`)

Timedelta
^^^^^^^^^
Expand Down
36 changes: 29 additions & 7 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -296,17 +296,34 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
return result


def array_with_unit_to_datetime(ndarray values, object unit,
def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit,
str errors='coerce'):
"""
convert the ndarray according to the unit
Convert the ndarray to datetime according to the time unit.

This function converts an array of objects into a numpy array of
datetime64[ns]. It returns the converted array
and also returns the timezone offset

if errors:
- raise: return converted values or raise OutOfBoundsDatetime
if out of range on the conversion or
ValueError for other conversions (e.g. a string)
- ignore: return non-convertible values as the same unit
- coerce: NaT for non-convertibles

Parameters
----------
values : ndarray of object
Date-like objects to convert
mask : ndarray of bool
Not-a-time mask for non-nullable integer types conversion,
can be None
unit : object
Time unit to use during conversion
errors : str, default 'raise'
Error behavior when parsing

Returns
-------
result : ndarray of m8 values
Expand All @@ -316,7 +333,6 @@ def array_with_unit_to_datetime(ndarray values, object unit,
Py_ssize_t i, j, n=len(values)
int64_t m
ndarray[float64_t] fvalues
ndarray mask
bint is_ignore = errors=='ignore'
bint is_coerce = errors=='coerce'
bint is_raise = errors=='raise'
Expand All @@ -329,9 +345,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,

if unit == 'ns':
if issubclass(values.dtype.type, np.integer):
return values.astype('M8[ns]'), tz
# This will return a tz
return array_to_datetime(values.astype(object), errors=errors)
result = values.astype('M8[ns]')
else:
result, tz = array_to_datetime(values.astype(object), errors=errors)
if mask is not None:
iresult = result.view('i8')
iresult[mask] = NPY_NAT
return result, tz

m = cast_from_unit(None, unit)

Expand All @@ -343,7 +363,9 @@ def array_with_unit_to_datetime(ndarray values, object unit,
if values.dtype.kind == "i":
# Note: this condition makes the casting="same_kind" redundant
iresult = values.astype('i8', casting='same_kind', copy=False)
mask = iresult == NPY_NAT
# If no mask, fill mask by comparing to NPY_NAT constant
if mask is None:
mask = iresult == NPY_NAT
iresult[mask] = 0
fvalues = iresult.astype('f8') * m
need_to_iterate = False
Expand Down
18 changes: 16 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
)
from pandas.core.dtypes.missing import notna

from pandas.arrays import IntegerArray
from pandas.core import algorithms
from pandas.core.algorithms import unique

Expand Down Expand Up @@ -316,8 +317,21 @@ def _convert_listlike_datetimes(
elif unit is not None:
if format is not None:
raise ValueError("cannot specify both format and unit")
arg = getattr(arg, "values", arg)
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
arg = getattr(arg, "_values", arg)

# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
# because it expects an ndarray argument
if isinstance(arg, IntegerArray):
# Explicitly pass NaT mask to array_with_unit_to_datetime
mask = arg.isna()
arg = arg._ndarray_values
else:
mask = None

result, tz_parsed = tslib.array_with_unit_to_datetime(
arg, mask, unit, errors=errors
)

if errors == "ignore":
from pandas import Index

Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2291,3 +2291,25 @@ def test_should_cache_errors(unique_share, check_count, err_message):

with pytest.raises(AssertionError, match=err_message):
tools.should_cache(arg, unique_share, check_count)


def test_nullable_integer_to_datetime():
# Test for #30050
ser = pd.Series([1, 2, None, 2 ** 61, None])
ser = ser.astype("Int64")
ser_copy = ser.copy()

res = pd.to_datetime(ser, unit="ns")

expected = pd.Series(
[
np.datetime64("1970-01-01 00:00:00.000000001"),
np.datetime64("1970-01-01 00:00:00.000000002"),
np.datetime64("NaT"),
np.datetime64("2043-01-25 23:56:49.213693952"),
np.datetime64("NaT"),
]
)
tm.assert_series_equal(res, expected)
# Check that ser isn't mutated
tm.assert_series_equal(ser, ser_copy)