diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e7b810dacdf57..4bc01b8f4ddb0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -326,6 +326,18 @@ def __new__( name = maybe_extract_name(name, data, cls) + if ( + isinstance(data, DatetimeArray) + and freq is lib.no_default + and tz is None + and dtype is None + ): + # fastpath, similar logic in TimedeltaIndex.__new__; + # Note in this particular case we retain non-nano. + if copy: + data = data.copy() + return cls._simple_new(data, name=name) + dtarr = DatetimeArray._from_sequence_not_strict( data, dtype=dtype, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 0249bf51f71b7..cdf09bbc3b78c 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -132,6 +132,7 @@ def __new__( "represent unambiguous timedelta values durations." ) + # FIXME: need to check for dtype/data match if isinstance(data, TimedeltaArray) and freq is lib.no_default: if copy: data = data.copy() diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7a5db56cb48fe..626809eab304e 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -326,7 +326,7 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarray(values, copy=copy_on_sanitize) + values = _prep_ndarraylike(values, copy=copy_on_sanitize) if dtype is not None and not is_dtype_equal(values.dtype, dtype): # GH#40110 see similar check inside sanitize_array @@ -341,7 +341,7 @@ def ndarray_to_mgr( allow_2d=True, ) - # _prep_ndarray ensures that values.ndim == 2 at this point + # _prep_ndarraylike ensures that values.ndim == 2 at this point index, columns = _get_axes( values.shape[0], values.shape[1], index=index, columns=columns ) @@ -537,15 +537,16 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarray(values, copy: bool = True) -> np.ndarray: +def _prep_ndarraylike( + values, copy: bool = True +) -> np.ndarray | DatetimeArray | TimedeltaArray: if isinstance(values, TimedeltaArray) or ( isinstance(values, DatetimeArray) and values.tz is None ): - # On older numpy, np.asarray below apparently does not call __array__, - # so nanoseconds get dropped. - values = values._ndarray + # By retaining DTA/TDA instead of unpacking, we end up retaining non-nano + pass - if not isinstance(values, (np.ndarray, ABCSeries, Index)): + elif not isinstance(values, (np.ndarray, ABCSeries, Index)): if len(values) == 0: return np.empty((0, 0), dtype=object) elif isinstance(values, range): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 0a67061016566..f06641002e039 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -52,6 +52,7 @@ IntervalArray, PeriodArray, SparseArray, + TimedeltaArray, ) from pandas.core.api import Int64Index @@ -2665,6 +2666,12 @@ def test_from_dict_with_missing_copy_false(self): ) tm.assert_frame_equal(df, expected) + def test_construction_empty_array_multi_column_raises(self): + # GH#46822 + msg = "Empty data passed with indices specified." + with pytest.raises(ValueError, match=msg): + DataFrame(data=np.array([]), columns=["a", "b"]) + class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): @@ -3086,8 +3093,50 @@ def test_tzaware_data_tznaive_dtype(self, constructor): assert np.all(result.dtypes == "M8[ns]") assert np.all(result == ts_naive) - def test_construction_empty_array_multi_column_raises(self): - # GH#46822 - msg = "Empty data passed with indices specified." - with pytest.raises(ValueError, match=msg): - DataFrame(data=np.array([]), columns=["a", "b"]) + +# TODO: better location for this test? +class TestAllowNonNano: + # Until 2.0, we do not preserve non-nano dt64/td64 when passed as ndarray, + # but do preserve it when passed as DTA/TDA + + @pytest.fixture(params=[True, False]) + def as_td(self, request): + return request.param + + @pytest.fixture + def arr(self, as_td): + values = np.arange(5).astype(np.int64).view("M8[s]") + if as_td: + values = values - values[0] + return TimedeltaArray._simple_new(values, dtype=values.dtype) + else: + return DatetimeArray._simple_new(values, dtype=values.dtype) + + def test_index_allow_non_nano(self, arr): + idx = Index(arr) + assert idx.dtype == arr.dtype + + def test_dti_tdi_allow_non_nano(self, arr, as_td): + if as_td: + idx = pd.TimedeltaIndex(arr) + else: + idx = DatetimeIndex(arr) + assert idx.dtype == arr.dtype + + def test_series_allow_non_nano(self, arr): + ser = Series(arr) + assert ser.dtype == arr.dtype + + def test_frame_allow_non_nano(self, arr): + df = DataFrame(arr) + assert df.dtypes[0] == arr.dtype + + @pytest.mark.xfail( + # TODO(2.0): xfail should become unnecessary + strict=False, + reason="stack_arrays converts TDA to ndarray, then goes " + "through ensure_wrapped_if_datetimelike", + ) + def test_frame_from_dict_allow_non_nano(self, arr): + df = DataFrame({0: arr}) + assert df.dtypes[0] == arr.dtype