diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fb1a5070c6c0d..a8d384b956c91 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -29,12 +29,14 @@ astype_overflowsafe, fields, get_resolution, + get_supported_reso, get_unit_from_dtype, ints_to_pydatetime, is_date_array_normalized, is_supported_unit, is_unitless, normalize_i8_timestamps, + npy_unit_to_abbrev, timezones, to_offset, tz_convert_from_utc, @@ -321,6 +323,14 @@ def _from_sequence_not_strict( # if dtype has an embedded tz, capture it tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none) + unit = None + if dtype is not None: + if isinstance(dtype, np.dtype): + unit = np.datetime_data(dtype)[0] + else: + # DatetimeTZDtype + unit = dtype.unit + subarr, tz, inferred_freq = _sequence_to_dt64ns( data, copy=copy, @@ -341,8 +351,12 @@ def _from_sequence_not_strict( if explicit_none: freq = None - dtype = tz_to_dtype(tz) - result = cls._simple_new(subarr, freq=freq, dtype=dtype) + data_unit = np.datetime_data(subarr.dtype)[0] + data_dtype = tz_to_dtype(tz, data_unit) + result = cls._simple_new(subarr, freq=freq, dtype=data_dtype) + if unit is not None and unit != result._unit: + # If unit was specified in user-passed dtype, cast to it here + result = result._as_unit(unit) if inferred_freq is None and freq is not None: # this condition precludes `freq_infer` @@ -2004,7 +2018,8 @@ def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray: require_iso8601=require_iso8601, ) - dtype = tz_to_dtype(tz) + unit = np.datetime_data(result.dtype)[0] + dtype = tz_to_dtype(tz, unit) dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype) return dta @@ -2110,8 +2125,21 @@ def _sequence_to_dt64ns( elif is_datetime64_dtype(data_dtype): # tz-naive DatetimeArray or ndarray[datetime64] data = getattr(data, "_ndarray", data) - if data.dtype != DT64NS_DTYPE: - data = astype_overflowsafe(data, dtype=DT64NS_DTYPE) + new_dtype = data.dtype + data_unit = get_unit_from_dtype(new_dtype) + if not is_supported_unit(data_unit): + # Cast to the nearest supported unit, generally "s" + new_reso = get_supported_reso(data_unit) + new_unit = npy_unit_to_abbrev(new_reso) + new_dtype = np.dtype(f"M8[{new_unit}]") + data = astype_overflowsafe(data, dtype=new_dtype, copy=False) + copy = False + + if data.dtype.byteorder == ">": + # TODO: better way to handle this? non-copying alternative? + # without this, test_constructor_datetime64_bigendian fails + data = data.astype(data.dtype.newbyteorder("<")) + new_dtype = data.dtype copy = False if tz is not None: @@ -2119,11 +2147,11 @@ def _sequence_to_dt64ns( # TODO: if tz is UTC, are there situations where we *don't* want a # copy? tz_localize_to_utc always makes one. data = tzconversion.tz_localize_to_utc( - data.view("i8"), tz, ambiguous=ambiguous + data.view("i8"), tz, ambiguous=ambiguous, reso=data_unit ) - data = data.view(DT64NS_DTYPE) + data = data.view(new_dtype) - assert data.dtype == DT64NS_DTYPE, data.dtype + assert data.dtype == new_dtype, data.dtype result = data else: @@ -2137,7 +2165,9 @@ def _sequence_to_dt64ns( result = result.copy() assert isinstance(result, np.ndarray), type(result) - assert result.dtype == "M8[ns]", result.dtype + assert result.dtype.kind == "M" + assert result.dtype != "M8" + assert is_supported_unit(get_unit_from_dtype(result.dtype)) return result, tz, inferred_freq @@ -2358,12 +2388,14 @@ def _validate_dt64_dtype(dtype): ) raise ValueError(msg) - if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance( - dtype, (np.dtype, DatetimeTZDtype) - ): + if ( + isinstance(dtype, np.dtype) + and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype))) + ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)): raise ValueError( f"Unexpected value for 'dtype': '{dtype}'. " - "Must be 'datetime64[ns]' or DatetimeTZDtype'." + "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', " + "'datetime64[ns]' or DatetimeTZDtype'." ) if getattr(dtype, "tz", None): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bd937a7bbb3a6..5066c307a1e38 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1373,6 +1373,9 @@ def maybe_cast_to_datetime( # Note: NOT equivalent to dta.astype(dtype) dta = dta.tz_localize(None) + # TODO(2.0): Do this astype in sequence_to_datetimes to + # avoid potential extra copy? + dta = dta.astype(dtype, copy=False) value = dta elif is_datetime64tz: dtype = cast(DatetimeTZDtype, dtype) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 633a763dab80a..d11f4648ec632 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -6,11 +6,6 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - is_platform_windows, -) - from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -749,10 +744,6 @@ def test_from_sequence_copy(self): assert not tm.shares_memory(result, cat) - @pytest.mark.xfail( - not IS64 or is_platform_windows(), - reason="Incorrectly raising in astype_overflowsafe", - ) def test_constructor_datetime64_non_nano(self): categories = np.arange(10).view("M8[D]") values = categories[::2].copy() diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 6a659c4aee0b1..14ca7a61c50f5 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -242,7 +242,9 @@ def test_array_copy(): ), ( np.array([1, 2], dtype="M8[us]"), - DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")), + DatetimeArray._simple_new( + np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]") + ), ), # datetimetz ( diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index f66ebc451c239..c8b923031b9e8 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -146,9 +146,9 @@ def test_constructor_datetime_outofbound(self, a, constructor): # datetime64[non-ns] raise error, other cases result in object dtype # and preserve original data if a.dtype.kind == "M": - msg = "Out of bounds" - with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg): - constructor(a) + # Can't fit in nanosecond bounds -> get the nearest supported unit + result = constructor(a) + assert result.dtype == "M8[s]" else: result = constructor(a) assert result.dtype == "object" @@ -162,7 +162,10 @@ def test_constructor_datetime_outofbound(self, a, constructor): def test_constructor_datetime_nonns(self, constructor): arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]") - expected = constructor(pd.to_datetime(["2020-01-01"])) + dta = pd.core.arrays.DatetimeArray._simple_new(arr, dtype=arr.dtype) + expected = constructor(dta) + assert expected.dtype == arr.dtype + result = constructor(arr) tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 3b5a0c5a32bc4..b2efa0713b513 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -44,6 +44,7 @@ def test_from_records_with_datetimes(self): dtypes = [("EXPIRY", "