pandas-dev · mroeschke · Oct 18, 2022 · Oct 12, 2022 · Oct 12, 2022 · Oct 12, 2022
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -29,12 +29,14 @@
     astype_overflowsafe,
     fields,
     get_resolution,
+    get_supported_reso,
     get_unit_from_dtype,
     ints_to_pydatetime,
     is_date_array_normalized,
     is_supported_unit,
     is_unitless,
     normalize_i8_timestamps,
+    npy_unit_to_abbrev,
     timezones,
     to_offset,
     tz_convert_from_utc,
@@ -321,6 +323,14 @@ def _from_sequence_not_strict(
         # if dtype has an embedded tz, capture it
         tz = validate_tz_from_dtype(dtype, tz, explicit_tz_none)
 
+        unit = None
+        if dtype is not None:
+            if isinstance(dtype, np.dtype):
+                unit = np.datetime_data(dtype)[0]
+            else:
+                # DatetimeTZDtype
+                unit = dtype.unit
+
         subarr, tz, inferred_freq = _sequence_to_dt64ns(
             data,
             copy=copy,
@@ -341,8 +351,12 @@ def _from_sequence_not_strict(
         if explicit_none:
             freq = None
 
-        dtype = tz_to_dtype(tz)
-        result = cls._simple_new(subarr, freq=freq, dtype=dtype)
+        data_unit = np.datetime_data(subarr.dtype)[0]
+        data_dtype = tz_to_dtype(tz, data_unit)
+        result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)
+        if unit is not None and unit != result._unit:
+            # If unit was specified in user-passed dtype, cast to it here
+            result = result._as_unit(unit)
 
         if inferred_freq is None and freq is not None:
             # this condition precludes `freq_infer`
@@ -2004,7 +2018,8 @@ def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray:
         require_iso8601=require_iso8601,
     )
 
-    dtype = tz_to_dtype(tz)
+    unit = np.datetime_data(result.dtype)[0]
+    dtype = tz_to_dtype(tz, unit)
     dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
     return dta
 
@@ -2110,20 +2125,33 @@ def _sequence_to_dt64ns(
     elif is_datetime64_dtype(data_dtype):
         # tz-naive DatetimeArray or ndarray[datetime64]
         data = getattr(data, "_ndarray", data)
-        if data.dtype != DT64NS_DTYPE:
-            data = astype_overflowsafe(data, dtype=DT64NS_DTYPE)
+        new_dtype = data.dtype
+        data_unit = get_unit_from_dtype(new_dtype)
+        if not is_supported_unit(data_unit):
+            # Cast to the nearest supported unit, generally "s"
+            new_reso = get_supported_reso(data_unit)
+            new_unit = npy_unit_to_abbrev(new_reso)
+            new_dtype = np.dtype(f"M8[{new_unit}]")
+            data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
+            copy = False
+
+        if data.dtype.byteorder == ">":
+            # TODO: better way to handle this?  non-copying alternative?
+            #  without this, test_constructor_datetime64_bigendian fails
+            data = data.astype(data.dtype.newbyteorder("<"))
+            new_dtype = data.dtype
             copy = False
 
         if tz is not None:
             # Convert tz-naive to UTC
             # TODO: if tz is UTC, are there situations where we *don't* want a
             #  copy?  tz_localize_to_utc always makes one.
             data = tzconversion.tz_localize_to_utc(
-                data.view("i8"), tz, ambiguous=ambiguous
+                data.view("i8"), tz, ambiguous=ambiguous, reso=data_unit
             )
-            data = data.view(DT64NS_DTYPE)
+            data = data.view(new_dtype)
 
-        assert data.dtype == DT64NS_DTYPE, data.dtype
+        assert data.dtype == new_dtype, data.dtype
         result = data
 
     else:
@@ -2137,7 +2165,9 @@ def _sequence_to_dt64ns(
         result = result.copy()
 
     assert isinstance(result, np.ndarray), type(result)
-    assert result.dtype == "M8[ns]", result.dtype
+    assert result.dtype.kind == "M"
+    assert result.dtype != "M8"
+    assert is_supported_unit(get_unit_from_dtype(result.dtype))
     return result, tz, inferred_freq
 
 
@@ -2358,12 +2388,14 @@ def _validate_dt64_dtype(dtype):
             )
             raise ValueError(msg)
 
-        if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance(
-            dtype, (np.dtype, DatetimeTZDtype)
-        ):
+        if (
+            isinstance(dtype, np.dtype)
+            and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype)))
+        ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
             raise ValueError(
                 f"Unexpected value for 'dtype': '{dtype}'. "
-                "Must be 'datetime64[ns]' or DatetimeTZDtype'."
+                "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "
+                "'datetime64[ns]' or DatetimeTZDtype'."
             )
 
         if getattr(dtype, "tz", None):

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1373,6 +1373,9 @@ def maybe_cast_to_datetime(
                             # Note: NOT equivalent to dta.astype(dtype)
                             dta = dta.tz_localize(None)
 
+                        # TODO(2.0): Do this astype in sequence_to_datetimes to
+                        #  avoid potential extra copy?
+                        dta = dta.astype(dtype, copy=False)
                         value = dta
                     elif is_datetime64tz:
                         dtype = cast(DatetimeTZDtype, dtype)

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -6,11 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import (
-    IS64,
-    is_platform_windows,
-)
-
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -749,10 +744,6 @@ def test_from_sequence_copy(self):
 
         assert not tm.shares_memory(result, cat)
 
-    @pytest.mark.xfail(
-        not IS64 or is_platform_windows(),
-        reason="Incorrectly raising in astype_overflowsafe",
-    )
     def test_constructor_datetime64_non_nano(self):
         categories = np.arange(10).view("M8[D]")
         values = categories[::2].copy()

diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -242,7 +242,9 @@ def test_array_copy():
         ),
         (
             np.array([1, 2], dtype="M8[us]"),
-            DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")),
+            DatetimeArray._simple_new(
+                np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
+            ),
         ),
         # datetimetz
         (

diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py
@@ -146,9 +146,9 @@ def test_constructor_datetime_outofbound(self, a, constructor):
         # datetime64[non-ns] raise error, other cases result in object dtype
         # and preserve original data
         if a.dtype.kind == "M":
-            msg = "Out of bounds"
-            with pytest.raises(pd.errors.OutOfBoundsDatetime, match=msg):
-                constructor(a)
+            # Can't fit in nanosecond bounds -> get the nearest supported unit
+            result = constructor(a)
+            assert result.dtype == "M8[s]"
         else:
             result = constructor(a)
             assert result.dtype == "object"
@@ -162,7 +162,10 @@ def test_constructor_datetime_outofbound(self, a, constructor):
 
     def test_constructor_datetime_nonns(self, constructor):
         arr = np.array(["2020-01-01T00:00:00.000000"], dtype="datetime64[us]")
-        expected = constructor(pd.to_datetime(["2020-01-01"]))
+        dta = pd.core.arrays.DatetimeArray._simple_new(arr, dtype=arr.dtype)
+        expected = constructor(dta)
+        assert expected.dtype == arr.dtype
+
         result = constructor(arr)
         tm.assert_equal(result, expected)
 

diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
@@ -44,6 +44,7 @@ def test_from_records_with_datetimes(self):
         dtypes = [("EXPIRY", "<M8[m]")]
         recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
         result = DataFrame.from_records(recarray)
+        expected["EXPIRY"] = expected["EXPIRY"].astype("M8[m]")
         tm.assert_frame_equal(result, expected)
 
     def test_from_records_sequencelike(self):

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -277,11 +277,11 @@ def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self):
         expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]")
         tm.assert_frame_equal(result, expected)
 
-        # OutOfBoundsDatetime error shouldn't occur
+        # OutOfBoundsDatetime error shouldn't occur; as of 2.0 we preserve "M8[s]"
         data_s = np.array([1, "nat"], dtype="datetime64[s]")
         result["new"] = data_s
-        expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]")
-        tm.assert_frame_equal(result, expected)
+        tm.assert_series_equal(result[0], expected[0])
+        tm.assert_numpy_array_equal(result["new"].to_numpy(), data_s)
 
     @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
     def test_frame_setitem_datetime64_col_other_units(self, unit):
@@ -291,12 +291,17 @@ def test_frame_setitem_datetime64_col_other_units(self, unit):
 
         dtype = np.dtype(f"M8[{unit}]")
         vals = np.arange(n, dtype=np.int64).view(dtype)
-        ex_vals = vals.astype("datetime64[ns]")
+        if unit in ["s", "ms"]:
+            # supported unit
+            ex_vals = vals
+        else:
+            # we get the nearest supported units, i.e. "s"
+            ex_vals = vals.astype("datetime64[s]")
 
         df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
         df[unit] = vals
 
-        assert df[unit].dtype == np.dtype("M8[ns]")
+        assert df[unit].dtype == ex_vals.dtype
         assert (df[unit].values == ex_vals).all()
 
     @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])

diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -424,9 +424,8 @@ def test_astype_to_datetime_unit(self, unit):
             # GH#48928
             exp_dtype = dtype
         else:
-            # TODO(2.0): use the nearest supported dtype (i.e. M8[s]) instead
-            #  of nanos
-            exp_dtype = "M8[ns]"
+            # we use the nearest supported dtype (i.e. M8[s])
+            exp_dtype = "M8[s]"
         # TODO(2.0): once DataFrame constructor doesn't cast ndarray inputs.
         #  can simplify this
         exp_values = arr.astype(exp_dtype)

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -228,11 +228,13 @@ def test_construction_with_conversions(self):
         expected = DataFrame(
             {
                 "dt1": Timestamp("20130101"),
-                "dt2": date_range("20130101", periods=3),
+                "dt2": date_range("20130101", periods=3).astype("M8[s]"),
                 # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'),
             },
             index=range(3),
         )
+        assert expected.dtypes["dt1"] == "M8[ns]"
+        assert expected.dtypes["dt2"] == "M8[s]"
 
         df = DataFrame(index=range(3))
         df["dt1"] = np.datetime64("2013-01-01")

diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -913,9 +913,9 @@ def test_constructor_no_precision_raises(self):
             Index(["2000"], dtype="datetime64")
 
     def test_constructor_wrong_precision_raises(self):
-        msg = "Unexpected value for 'dtype': 'datetime64\\[us\\]'"
-        with pytest.raises(ValueError, match=msg):
-            DatetimeIndex(["2000"], dtype="datetime64[us]")
+        dti = DatetimeIndex(["2000"], dtype="datetime64[us]")
+        assert dti.dtype == "M8[us]"
+        assert dti[0] == Timestamp(2000, 1, 1)
 
     def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self):
         # GH 27011

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -731,18 +731,28 @@ def test_other_datetime_unit(self, unit):
 
         dtype = f"datetime64[{unit}]"
         df2 = ser.astype(dtype).to_frame("days")
-        # coerces to datetime64[ns], thus should not be affected
-        assert df2["days"].dtype == "datetime64[ns]"
+
+        if unit in ["D", "h", "m"]:
+            # not supported so we cast to the nearest supported unit, seconds
+            # TODO(2.0): cast to nearest (second) instead of ns
+            # coerces to datetime64[ns], thus should not be affected
+            exp_dtype = "datetime64[s]"
+        else:
+            exp_dtype = dtype
+        assert df2["days"].dtype == exp_dtype
 
         result = df1.merge(df2, left_on="entity_id", right_index=True)
 
+        days = np.array(["nat", "nat"], dtype=exp_dtype)
+        days = pd.core.arrays.DatetimeArray._simple_new(days, dtype=days.dtype)
         exp = DataFrame(
             {
                 "entity_id": [101, 102],
-                "days": np.array(["nat", "nat"], dtype="datetime64[ns]"),
+                "days": days,
             },
             columns=["entity_id", "days"],
         )
+        assert exp["days"].dtype == exp_dtype
         tm.assert_frame_equal(result, exp)
 
     @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"])