From eab103007fdc79b8eacd93aca4be221cf5524724 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Sep 2020 15:15:36 -0700 Subject: [PATCH 01/11] EA: tighten TimedeltaArray._from_sequence signature --- pandas/core/arrays/timedeltas.py | 20 ++++++++++++++++++-- pandas/core/indexes/timedeltas.py | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 145380ecce9fd..d04671c1852dd 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -210,8 +210,24 @@ def _simple_new(cls, values, freq=None, dtype=TD64NS_DTYPE): return result @classmethod - def _from_sequence( - cls, data, dtype=TD64NS_DTYPE, copy=False, freq=lib.no_default, unit=None + def _from_sequence(cls, data, dtype=TD64NS_DTYPE, copy: bool = False): + if dtype: + _validate_td64_dtype(dtype) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) + freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) + + result = cls._simple_new(data, freq=freq) + return result + + @classmethod + def _from_sequence_not_strict( + cls, + data, + dtype=TD64NS_DTYPE, + copy: bool = False, + freq=lib.no_default, + unit=None, ): if dtype: _validate_td64_dtype(dtype) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 20ebc80c7e0af..b3043b0aa8173 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -153,7 +153,7 @@ def __new__( # - Cases checked above all return/raise before reaching here - # - tdarr = TimedeltaArray._from_sequence( + tdarr = TimedeltaArray._from_sequence_not_strict( data, freq=freq, unit=unit, dtype=dtype, copy=copy ) return cls._simple_new(tdarr, name=name) From e54ed75dde395385664560794f8df6edfd5b0735 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 28 Sep 2020 18:19:56 -0700 Subject: [PATCH 02/11] EA: Tighten signature on DatetimeArray._from_sequence --- pandas/core/arrays/datetimes.py | 6 ++++- pandas/core/indexes/datetimes.py | 2 +- pandas/core/nanops.py | 4 ++- pandas/tests/arrays/test_array.py | 4 ++- pandas/tests/arrays/test_datetimes.py | 27 +++++++++++++------ pandas/tests/extension/test_datetime.py | 4 ++- .../indexes/datetimes/test_constructors.py | 4 ++- pandas/tests/scalar/test_nat.py | 5 +++- 8 files changed, 41 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cd5449058fb33..db73c84b39cf9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -299,7 +299,11 @@ def _simple_new( return result @classmethod - def _from_sequence( + def _from_sequence(cls, scalars, dtype=None, copy: bool = False): + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_sequence_not_strict( cls, data, dtype=None, diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index da78f8ff5d603..06405995f7685 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -295,7 +295,7 @@ def __new__( name = maybe_extract_name(name, data, cls) - dtarr = DatetimeArray._from_sequence( + dtarr = DatetimeArray._from_sequence_not_strict( data, dtype=dtype, copy=copy, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 64470da2fb910..f2354f649b1e3 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1616,7 +1616,9 @@ def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike: result = result.view(orig_dtype) else: # DatetimeArray - result = type(values)._from_sequence(result, dtype=orig_dtype) + result = type(values)._simple_new( # type: ignore[attr-defined] + result, dtype=orig_dtype + ) elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)): vals = values.copy() diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index ff2573a51c3e7..72deada4eaf43 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -210,7 +210,9 @@ def test_array_copy(): datetime.datetime(2000, 1, 1, tzinfo=cet), datetime.datetime(2001, 1, 1, tzinfo=cet), ], - DatetimeArray._from_sequence(["2000", "2001"], tz=cet), + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet) + ), ), # timedelta ( diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 53f26de09f94e..e7605125e7420 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -71,7 +71,7 @@ def test_mixing_naive_tzaware_raises(self, meth): def test_from_pandas_array(self): arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 - result = DatetimeArray._from_sequence(arr, freq="infer") + result = DatetimeArray._from_sequence(arr)._with_freq("infer") expected = pd.date_range("1970-01-01", periods=5, freq="H")._data tm.assert_datetime_array_equal(result, expected) @@ -162,7 +162,9 @@ def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): class TestDatetimeArray: def test_astype_to_same(self): - arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr @@ -193,7 +195,9 @@ def test_astype_int(self, dtype): tm.assert_numpy_array_equal(result, expected) def test_tz_setter_raises(self): - arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) with pytest.raises(AttributeError, match="tz_localize"): arr.tz = "UTC" @@ -282,7 +286,8 @@ def test_fillna_preserves_tz(self, method): fill_val = dti[1] if method == "pad" else dti[3] expected = DatetimeArray._from_sequence( - [dti[0], dti[1], fill_val, dti[3], dti[4]], freq=None, tz="US/Central" + [dti[0], dti[1], fill_val, dti[3], dti[4]], + dtype=DatetimeTZDtype(tz="US/Central"), ) result = arr.fillna(method=method) @@ -434,12 +439,16 @@ def test_shift_value_tzawareness_mismatch(self): class TestSequenceToDT64NS: def test_tz_dtype_mismatch_raises(self): - arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) with pytest.raises(TypeError, match="data is already tz-aware"): sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) def test_tz_dtype_matches(self): - arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + arr = DatetimeArray._from_sequence( + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + ) result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) tm.assert_numpy_array_equal(arr._data, result) @@ -447,6 +456,7 @@ def test_tz_dtype_matches(self): class TestReductions: @pytest.mark.parametrize("tz", [None, "US/Central"]) def test_min_max(self, tz): + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") arr = DatetimeArray._from_sequence( [ "2000-01-03", @@ -456,7 +466,7 @@ def test_min_max(self, tz): "2000-01-05", "2000-01-04", ], - tz=tz, + dtype=dtype, ) result = arr.min() @@ -476,7 +486,8 @@ def test_min_max(self, tz): @pytest.mark.parametrize("tz", [None, "US/Central"]) @pytest.mark.parametrize("skipna", [True, False]) def test_min_max_empty(self, skipna, tz): - arr = DatetimeArray._from_sequence([], tz=tz) + dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") + arr = DatetimeArray._from_sequence([], dtype=dtype) result = arr.min(skipna=skipna) assert result is pd.NaT diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index e026809f7e611..0fde1e8a2fdb8 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -181,8 +181,10 @@ def test_concat_mixed_dtypes(self, data): @pytest.mark.parametrize("obj", ["series", "frame"]) def test_unstack(self, obj): # GH-13287: can't use base test, since building the expected fails. + dtype = DatetimeTZDtype(tz="US/Central") data = DatetimeArray._from_sequence( - ["2000", "2001", "2002", "2003"], tz="US/Central" + ["2000", "2001", "2002", "2003"], + dtype=dtype, ) index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 9a855a1624520..d3c79f231449a 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -16,7 +16,9 @@ class TestDatetimeIndex: - @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence]) + @pytest.mark.parametrize( + "dt_cls", [DatetimeIndex, DatetimeArray._from_sequence_not_strict] + ) def test_freq_validation_with_nat(self, dt_cls): # GH#11587 make sure we get a useful error message when generate_range # raises diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 09d5d9c1677d0..2ea7602b00206 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -12,6 +12,7 @@ from pandas import ( DatetimeIndex, + DatetimeTZDtype, Index, NaT, Period, @@ -440,7 +441,9 @@ def test_nat_rfloordiv_timedelta(val, expected): DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), - DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], tz="US/Pacific"), + DatetimeArray._from_sequence( + ["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific") + ), TimedeltaIndex(["1 day", "2 day"], name="x"), ], ) From 77e7c2150975c55d8616ab66355e57849b837798 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 16 Oct 2020 15:54:53 -0700 Subject: [PATCH 03/11] API: restrict DTA/TDA _from_sequence --- pandas/core/arrays/datetimelike.py | 18 +++++++++++ pandas/core/arrays/datetimes.py | 35 +++++++++++++++++++++- pandas/core/arrays/timedeltas.py | 16 ++++++++++ pandas/tests/arithmetic/test_period.py | 2 +- pandas/tests/arrays/test_array.py | 34 ++++++++++----------- pandas/tests/arrays/test_datetimes.py | 26 ++++++++-------- pandas/tests/arrays/test_timedeltas.py | 4 ++- pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/frame/test_constructors.py | 1 + 9 files changed, 103 insertions(+), 35 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index cc2c753857032..76e6acf601221 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1584,6 +1584,24 @@ def _with_freq(self, freq): # Shared Constructor Helpers +def ensure_arraylike(scalars, copy: bool) -> Tuple[Any, bool]: + """ + Convert non-arraylike scalar sequences to ndarray. + """ + if not hasattr(scalars, "dtype"): + copy = False + if np.ndim(scalars) == 0: + scalars = list(scalars) + + scalars = np.asarray(scalars) + if len(scalars) == 0: + # Without casting, we would have float64 and so would reject later + # in from_sequence + scalars = scalars.astype(object) + + return scalars, copy + + def validate_periods(periods): """ If a `periods` argument is passed to the Datetime/Timedelta Array/Index diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b1b8b513320e9..be678e1bf998b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -43,7 +43,12 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndexClass, + ABCMultiIndex, + ABCPandasArray, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core.algorithms import checked_add_with_arr @@ -300,6 +305,34 @@ def _simple_new( @classmethod def _from_sequence(cls, scalars, dtype=None, copy: bool = False): + + scalars, copy = dtl.ensure_arraylike(scalars, copy) + + if scalars.dtype.kind == "M": + pass + elif scalars.dtype == object: + if isinstance(scalars, ABCMultiIndex): + raise TypeError("Cannot create a DatetimeArray from MultiIndex") + + inferred = lib.infer_dtype(scalars) + if inferred in ["datetime64", "date", "datetime", "empty"]: + pass + else: + msg = f"dtype {scalars.dtype} cannot be converted to datetime64[ns]" + raise TypeError(msg) + elif is_string_dtype(scalars.dtype): + # TODO: should go through from_sequence_of_strings? + pass + elif ( + is_categorical_dtype(scalars.dtype) and scalars.categories.dtype.kind == "M" + ): + # TODO: Could also use Categorical[object] + # with inferred_type as above? + pass + else: + msg = f"dtype {scalars.dtype} cannot be converted to datetime64[ns]" + raise TypeError(msg) + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) @classmethod diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 82cd54182a33d..650e72fe27230 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -224,6 +224,22 @@ def _from_sequence( if dtype: _validate_td64_dtype(dtype) + data, copy = dtl.ensure_arraylike(data, copy) + + if data.dtype.kind == "m": + pass + elif data.dtype == object: + inferred = lib.infer_dtype(data) + if inferred in ["timedelta64", "timedelta", "empty"]: + pass + else: + raise TypeError(inferred) + elif is_string_dtype(data.dtype): + # TODO: should go through from_sequence_of_strings? + pass + else: + raise TypeError(data.dtype) + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index e78e696d00398..f3a844beb9273 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1191,7 +1191,7 @@ def test_parr_add_sub_td64_nat(self, box_with_array, transpose): "other", [ np.array(["NaT"] * 9, dtype="m8[ns]"), - TimedeltaArray._from_sequence(["NaT"] * 9), + TimedeltaArray._from_sequence([np.timedelta64("NaT", "ns")] * 9), ], ) def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 72deada4eaf43..92ebf3ee0e5f5 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -48,7 +48,7 @@ ( [pd.Period("2000", "D"), pd.Period("2001", "D")], "Period[D]", - period_array(["2000", "2001"], freq="D"), + period_array([pd.Timestamp("2000"), pd.Timestamp("2001")], freq="D"), ), # Period dtype ( @@ -57,11 +57,6 @@ period_array(["2000"], freq="D"), ), # Datetime (naive) - ( - [1, 2], - np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), - ), ( np.array([1, 2], dtype="datetime64[ns]"), None, @@ -70,41 +65,42 @@ ( pd.DatetimeIndex(["2000", "2001"]), np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), ), ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), ), ( ["2000", "2001"], np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), ), # Datetime (tz-aware) ( ["2000", "2001"], pd.DatetimeTZDtype(tz="CET"), DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + [pd.Timestamp("2000"), pd.Timestamp("2001")], + dtype=pd.DatetimeTZDtype(tz="CET"), ), ), # Timedelta ( ["1H", "2H"], np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence(["1H", "2H"]), + TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), ), ( pd.TimedeltaIndex(["1H", "2H"]), np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence(["1H", "2H"]), + TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), ), ( pd.TimedeltaIndex(["1H", "2H"]), None, - TimedeltaArray._from_sequence(["1H", "2H"]), + TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), ), # Category (["a", "b"], "category", pd.Categorical(["a", "b"])), @@ -184,11 +180,11 @@ def test_array_copy(): # datetime ( [pd.Timestamp("2000"), pd.Timestamp("2001")], - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], - DatetimeArray._from_sequence(["2000", "2001"]), + DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), ), ( np.array([1, 2], dtype="M8[ns]"), @@ -202,7 +198,8 @@ def test_array_copy(): ( [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + [pd.Timestamp("2000"), pd.Timestamp("2001")], + dtype=pd.DatetimeTZDtype(tz="CET"), ), ), ( @@ -211,13 +208,14 @@ def test_array_copy(): datetime.datetime(2001, 1, 1, tzinfo=cet), ], DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet) + [pd.Timestamp("2000"), pd.Timestamp("2001")], + dtype=pd.DatetimeTZDtype(tz=cet), ), ), # timedelta ( [pd.Timedelta("1H"), pd.Timedelta("2H")], - TimedeltaArray._from_sequence(["1H", "2H"]), + TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), ), ( np.array([1, 2], dtype="m8[ns]"), diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 9f136b4979bb7..8578aced17526 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -71,10 +71,10 @@ def test_mixing_naive_tzaware_raises(self, meth): def test_from_pandas_array(self): arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 - result = DatetimeArray._from_sequence(arr)._with_freq("infer") + result = pd.DatetimeIndex(arr, freq="infer") - expected = pd.date_range("1970-01-01", periods=5, freq="H")._data - tm.assert_datetime_array_equal(result, expected) + expected = pd.date_range("1970-01-01", periods=5, freq="H") + tm.assert_index_equal(result, expected) def test_mismatched_timezone_raises(self): arr = DatetimeArray( @@ -163,7 +163,7 @@ def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): class TestDatetimeArray: def test_astype_to_same(self): arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") ) result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr @@ -196,7 +196,7 @@ def test_astype_int(self, dtype): def test_tz_setter_raises(self): arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") ) with pytest.raises(AttributeError, match="tz_localize"): arr.tz = "UTC" @@ -440,14 +440,14 @@ def test_shift_value_tzawareness_mismatch(self): class TestSequenceToDT64NS: def test_tz_dtype_mismatch_raises(self): arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") ) with pytest.raises(TypeError, match="data is already tz-aware"): sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) def test_tz_dtype_matches(self): arr = DatetimeArray._from_sequence( - ["2000"], dtype=DatetimeTZDtype(tz="US/Central") + [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") ) result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) tm.assert_numpy_array_equal(arr._data, result) @@ -460,12 +460,12 @@ def arr1d(self, tz_naive_fixture): dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") arr = DatetimeArray._from_sequence( [ - "2000-01-03", - "2000-01-03", - "NaT", - "2000-01-02", - "2000-01-05", - "2000-01-04", + pd.Timestamp("2000-01-03"), + pd.Timestamp("2000-01-03"), + pd.NaT, + pd.Timestamp("2000-01-02"), + pd.Timestamp("2000-01-05"), + pd.Timestamp("2000-01-04"), ], dtype=dtype, ) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index b3b8f4d55e4de..e9d5c7dd3a8af 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -188,7 +188,9 @@ def test_reductions_empty(self, name, skipna): assert result is pd.NaT def test_min_max(self): - arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) + vals = ["3H", "3H", "NaT", "2H", "5H", "4H"] + vals = [pd.Timedelta(x) for x in vals] + arr = TimedeltaArray._from_sequence(vals) result = arr.min() expected = pd.Timedelta("2H") diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 7d03dadb20dd9..dc908ec1fd543 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -199,7 +199,7 @@ def test_cast_category_to_extension_dtype(self, expected): ) def test_consistent_casting(self, dtype, expected): # GH 28448 - result = pd.Categorical("2015-01-01").astype(dtype) + result = pd.Categorical(pd.Timestamp("2015-01-01")).astype(dtype) assert result == expected diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8ec11d14cd606..e48068e1ea55e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2804,6 +2804,7 @@ def test_from_tzaware_mixed_object_array(self): ] assert (res.dtypes == expected_dtypes).all() + @pytest.mark.xfail(reason="DatetimeArray._from_sequence no longer accepts i8") def test_from_2d_ndarray_with_dtype(self): # GH#12513 array_dim2 = np.arange(10).reshape((5, 2)) From f2a2aafbd8f8589cd859b69387cfdc16baa5a088 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Oct 2020 18:58:31 -0700 Subject: [PATCH 04/11] test --- pandas/core/arrays/datetimes.py | 2 +- pandas/tests/arrays/test_datetimes.py | 5 +++++ pandas/tests/arrays/test_timedeltas.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 89322011ee450..1f2828647f9e6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -318,7 +318,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False): if inferred in ["datetime64", "date", "datetime", "empty"]: pass else: - msg = f"dtype {scalars.dtype} cannot be converted to datetime64[ns]" + msg = f"{inferred} scalars cannot be converted to datetime64[ns]" raise TypeError(msg) elif is_string_dtype(scalars.dtype): # TODO: should go through from_sequence_of_strings? diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index e272ae984e422..30c3bf1d20999 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -20,6 +20,11 @@ def test_from_sequence_invalid_type(self): with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): DatetimeArray._from_sequence(mi) + msg = "mixed scalars cannot be converted to datetime64" + with pytest.raises(TypeError, match=msg): + # GH#37179 + DatetimeArray._from_sequence(mi._values) + def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 495fe382fb14d..8c56de998ac07 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -59,6 +59,16 @@ def test_copy(self): assert arr._data is not data assert arr._data.base is not data + def test_from_sequence_invalid_dtypes(self): + # GH#37179 + data = np.arange(5, dtype=np.float64) + with pytest.raises(TypeError, match="float64"): + TimedeltaArray._from_sequence(data) + + with pytest.raises(TypeError, match="floating"): + # object-dtype array of floats + TimedeltaArray._from_sequence(data.astype(object)) + class TestTimedeltaArray: # TODO: de-duplicate with test_npsum below From bc532be3814504e2a9aed1b770616391da68e7fd Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Oct 2020 18:21:08 -0700 Subject: [PATCH 05/11] lint fixup --- pandas/tests/arrays/test_timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 4af7613879ac0..0070b1ea4a972 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -214,7 +214,7 @@ def test_sum_empty(self, skipna): def test_min_max(self): vals = ["3H", "3H", "NaT", "2H", "5H", "4H"] - vals = [pd.Timedelta(x) for x in vals] + vals = [Timedelta(x) for x in vals] arr = TimedeltaArray._from_sequence(vals) result = arr.min() From 8f1b25de629ba86f0b289272c0a22db4d28a8e43 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Nov 2020 16:10:42 -0800 Subject: [PATCH 06/11] Use _from_sequence_of_strings where appropriate --- pandas/core/arrays/datetimelike.py | 8 ++++++++ pandas/core/dtypes/cast.py | 5 ++++- pandas/tests/extension/json/array.py | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 156f6dde179d5..5ffc8f60fbf91 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1525,6 +1525,14 @@ class TimelikeOps(DatetimeLikeArrayMixin): Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. """ + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + # Note: for now at least, caller is responsible for checking that + # strings are being passed. + return cls._from_sequence_not_strict(strings, dtype=dtype, copy=copy) + + # -------------------------------------------------------------- + def _round(self, freq, mode, ambiguous, nonexistent): # round the local times if is_datetime64tz_dtype(self.dtype): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index aded0af6aca0e..3d750ba7db141 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1000,7 +1000,10 @@ def astype_nansafe( """ # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): - return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) + cls = dtype.construct_array_type() + if lib.infer_dtype(arr) == "string": + return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy) + return cls._from_sequence(arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index e3cdeb9c1951f..45e58a16947ca 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -172,7 +172,8 @@ def astype(self, dtype, copy=True): return self elif isinstance(dtype, StringDtype): value = self.astype(str) # numpy doesn'y like nested dicts - return dtype.construct_array_type()._from_sequence(value, copy=False) + cls = dtype.construct_array_type() + return cls._from_sequence_of_strings(value, copy=False) return np.array([dict(x) for x in self], dtype=dtype, copy=copy) From 9f72ad80187d1732c552f9b72640b89bd1564914 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 Nov 2020 10:50:21 -0800 Subject: [PATCH 07/11] workaround for i8 case --- pandas/core/dtypes/cast.py | 3 +++ pandas/tests/frame/test_constructors.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b503d90d9e74a..68eb8a7492978 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1044,6 +1044,9 @@ def astype_nansafe( cls = dtype.construct_array_type() if lib.infer_dtype(arr) == "string": return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy) + if is_datetime64tz_dtype(dtype): + # GH#37179 workaround until we have a more robust solution + return cls._from_sequence_not_strict(arr, dtype=dtype, copy=copy) return cls._from_sequence(arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index edc8e1fc9f695..408024e48a35a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2898,7 +2898,6 @@ def test_from_tzaware_mixed_object_array(self): ] assert (res.dtypes == expected_dtypes).all() - @pytest.mark.xfail(reason="DatetimeArray._from_sequence no longer accepts i8") def test_from_2d_ndarray_with_dtype(self): # GH#12513 array_dim2 = np.arange(10).reshape((5, 2)) From 54c87da6ad46a0b2f58b2b63b15ad30c6319ac35 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 Nov 2020 12:17:39 -0800 Subject: [PATCH 08/11] mypy fixup --- pandas/core/dtypes/cast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 68eb8a7492978..609d50b1d5e58 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -16,6 +16,7 @@ Tuple, Type, Union, + cast, ) import numpy as np @@ -97,7 +98,7 @@ if TYPE_CHECKING: from pandas import Series - from pandas.core.arrays import ExtensionArray + from pandas.core.arrays import DatetimeArray, ExtensionArray from pandas.core.indexes.base import Index from pandas.core.indexes.datetimes import DatetimeIndex @@ -1046,6 +1047,7 @@ def astype_nansafe( return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy) if is_datetime64tz_dtype(dtype): # GH#37179 workaround until we have a more robust solution + cls = cast(Type["DatetimeArray"], cls) return cls._from_sequence_not_strict(arr, dtype=dtype, copy=copy) return cls._from_sequence(arr, dtype=dtype, copy=copy) From bffacb1f4a337cc88ca6a32f3b1b62b789e3749b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 13:47:03 -0800 Subject: [PATCH 09/11] from_sequence -> from_sequence_strict --- pandas/core/arrays/datetimes.py | 7 ++++++- pandas/core/arrays/timedeltas.py | 12 +++++++++++- pandas/core/dtypes/cast.py | 7 +------ pandas/tests/arrays/test_datetimes.py | 4 ++-- pandas/tests/arrays/test_timedeltas.py | 6 +++--- 5 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 289275ccb7754..531a808dc94b4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -306,7 +306,8 @@ def _simple_new( return result @classmethod - def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): + def _from_sequence_strict(cls, scalars, *, dtype=None, copy: bool = False): + # GH#37179 eventually _from_sequence should be strict scalars, copy = dtl.ensure_arraylike(scalars, copy) @@ -337,6 +338,10 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + @classmethod + def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + @classmethod def _from_sequence_not_strict( cls, diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 7ccd3d2be8fd6..bd82b4be6c726 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -218,9 +218,10 @@ def _simple_new( return result @classmethod - def _from_sequence( + def _from_sequence_strict( cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False ) -> "TimedeltaArray": + # GH#37179 eventually we want _from_sequence to be strict if dtype: _validate_td64_dtype(dtype) @@ -240,6 +241,15 @@ def _from_sequence( else: raise TypeError(data.dtype) + return cls._from_sequence(data=data, copy=copy) + + @classmethod + def _from_sequence( + cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False + ) -> "TimedeltaArray": + if dtype: + _validate_td64_dtype(dtype) + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 609d50b1d5e58..b503d90d9e74a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -16,7 +16,6 @@ Tuple, Type, Union, - cast, ) import numpy as np @@ -98,7 +97,7 @@ if TYPE_CHECKING: from pandas import Series - from pandas.core.arrays import DatetimeArray, ExtensionArray + from pandas.core.arrays import ExtensionArray from pandas.core.indexes.base import Index from pandas.core.indexes.datetimes import DatetimeIndex @@ -1045,10 +1044,6 @@ def astype_nansafe( cls = dtype.construct_array_type() if lib.infer_dtype(arr) == "string": return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy) - if is_datetime64tz_dtype(dtype): - # GH#37179 workaround until we have a more robust solution - cls = cast(Type["DatetimeArray"], cls) - return cls._from_sequence_not_strict(arr, dtype=dtype, copy=copy) return cls._from_sequence(arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 39baf92dda5fa..6393fe85d7c9f 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,7 +16,7 @@ class TestDatetimeArrayConstructor: - def test_from_sequence_invalid_type(self): + def test_from_sequence_strict_invalid_type(self): mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): DatetimeArray._from_sequence(mi) @@ -24,7 +24,7 @@ def test_from_sequence_invalid_type(self): msg = "mixed scalars cannot be converted to datetime64" with pytest.raises(TypeError, match=msg): # GH#37179 - DatetimeArray._from_sequence(mi._values) + DatetimeArray._from_sequence_strict(mi._values) def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 0070b1ea4a972..46d1df829af4b 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -61,15 +61,15 @@ def test_copy(self): assert arr._data is not data assert arr._data.base is not data - def test_from_sequence_invalid_dtypes(self): + def test_from_sequence_strict_invalid_dtypes(self): # GH#37179 data = np.arange(5, dtype=np.float64) with pytest.raises(TypeError, match="float64"): - TimedeltaArray._from_sequence(data) + TimedeltaArray._from_sequence_strict(data) with pytest.raises(TypeError, match="floating"): # object-dtype array of floats - TimedeltaArray._from_sequence(data.astype(object)) + TimedeltaArray._from_sequence_strict(data.astype(object)) class TestTimedeltaArray: From 5acede84e3b012c97668394af2521d5f29fd1752 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 19 Nov 2020 18:02:21 -0800 Subject: [PATCH 10/11] mypy fixup --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6539c556d2e62..c3dce88ebc2ed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1531,7 +1531,7 @@ class TimelikeOps(DatetimeLikeArrayMixin): def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): # Note: for now at least, caller is responsible for checking that # strings are being passed. - return cls._from_sequence_not_strict(strings, dtype=dtype, copy=copy) + return cls._from_sequence(strings, dtype=dtype, copy=copy) # -------------------------------------------------------------- From 8c877602ce1e8c2e00165d88aeba341f830319f1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 20 Nov 2020 12:49:06 -0800 Subject: [PATCH 11/11] revert test edits --- pandas/core/arrays/timedeltas.py | 5 +--- pandas/tests/arithmetic/test_period.py | 2 +- pandas/tests/arrays/test_array.py | 34 ++++++++++++---------- pandas/tests/arrays/test_datetimes.py | 33 ++++++++++++--------- pandas/tests/arrays/test_timedeltas.py | 6 ++-- pandas/tests/extension/test_categorical.py | 2 +- 6 files changed, 42 insertions(+), 40 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bd82b4be6c726..888bcc93cf497 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -234,10 +234,7 @@ def _from_sequence_strict( if inferred in ["timedelta64", "timedelta", "empty"]: pass else: - raise TypeError(inferred) - elif is_string_dtype(data.dtype): - # TODO: should go through from_sequence_of_strings? - pass + raise ValueError(inferred) else: raise TypeError(data.dtype) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 8c4f258e71bb7..690d10054f4c4 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1187,7 +1187,7 @@ def test_parr_add_sub_td64_nat(self, box_with_array, transpose): "other", [ np.array(["NaT"] * 9, dtype="m8[ns]"), - TimedeltaArray._from_sequence([np.timedelta64("NaT", "ns")] * 9), + TimedeltaArray._from_sequence(["NaT"] * 9), ], ) def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 92ebf3ee0e5f5..72deada4eaf43 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -48,7 +48,7 @@ ( [pd.Period("2000", "D"), pd.Period("2001", "D")], "Period[D]", - period_array([pd.Timestamp("2000"), pd.Timestamp("2001")], freq="D"), + period_array(["2000", "2001"], freq="D"), ), # Period dtype ( @@ -57,6 +57,11 @@ period_array(["2000"], freq="D"), ), # Datetime (naive) + ( + [1, 2], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), ( np.array([1, 2], dtype="datetime64[ns]"), None, @@ -65,42 +70,41 @@ ( pd.DatetimeIndex(["2000", "2001"]), np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), + DatetimeArray._from_sequence(["2000", "2001"]), ), ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), + DatetimeArray._from_sequence(["2000", "2001"]), ), ( ["2000", "2001"], np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), + DatetimeArray._from_sequence(["2000", "2001"]), ), # Datetime (tz-aware) ( ["2000", "2001"], pd.DatetimeTZDtype(tz="CET"), DatetimeArray._from_sequence( - [pd.Timestamp("2000"), pd.Timestamp("2001")], - dtype=pd.DatetimeTZDtype(tz="CET"), + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") ), ), # Timedelta ( ["1H", "2H"], np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), + TimedeltaArray._from_sequence(["1H", "2H"]), ), ( pd.TimedeltaIndex(["1H", "2H"]), np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), + TimedeltaArray._from_sequence(["1H", "2H"]), ), ( pd.TimedeltaIndex(["1H", "2H"]), None, - TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), + TimedeltaArray._from_sequence(["1H", "2H"]), ), # Category (["a", "b"], "category", pd.Categorical(["a", "b"])), @@ -180,11 +184,11 @@ def test_array_copy(): # datetime ( [pd.Timestamp("2000"), pd.Timestamp("2001")], - DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), + DatetimeArray._from_sequence(["2000", "2001"]), ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], - DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]), + DatetimeArray._from_sequence(["2000", "2001"]), ), ( np.array([1, 2], dtype="M8[ns]"), @@ -198,8 +202,7 @@ def test_array_copy(): ( [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], DatetimeArray._from_sequence( - [pd.Timestamp("2000"), pd.Timestamp("2001")], - dtype=pd.DatetimeTZDtype(tz="CET"), + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") ), ), ( @@ -208,14 +211,13 @@ def test_array_copy(): datetime.datetime(2001, 1, 1, tzinfo=cet), ], DatetimeArray._from_sequence( - [pd.Timestamp("2000"), pd.Timestamp("2001")], - dtype=pd.DatetimeTZDtype(tz=cet), + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet) ), ), # timedelta ( [pd.Timedelta("1H"), pd.Timedelta("2H")], - TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]), + TimedeltaArray._from_sequence(["1H", "2H"]), ), ( np.array([1, 2], dtype="m8[ns]"), diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 6393fe85d7c9f..fb1a33409bd18 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -19,13 +19,18 @@ class TestDatetimeArrayConstructor: def test_from_sequence_strict_invalid_type(self): mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): - DatetimeArray._from_sequence(mi) + DatetimeArray._from_sequence_strict(mi) msg = "mixed scalars cannot be converted to datetime64" with pytest.raises(TypeError, match=msg): # GH#37179 DatetimeArray._from_sequence_strict(mi._values) + def test_from_sequence_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence(mi) + def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") @@ -77,10 +82,10 @@ def test_mixing_naive_tzaware_raises(self, meth): def test_from_pandas_array(self): arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 - result = pd.DatetimeIndex(arr, freq="infer") + result = DatetimeArray._from_sequence(arr)._with_freq("infer") - expected = pd.date_range("1970-01-01", periods=5, freq="H") - tm.assert_index_equal(result, expected) + expected = pd.date_range("1970-01-01", periods=5, freq="H")._data + tm.assert_datetime_array_equal(result, expected) def test_mismatched_timezone_raises(self): arr = DatetimeArray( @@ -169,7 +174,7 @@ def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): class TestDatetimeArray: def test_astype_to_same(self): arr = DatetimeArray._from_sequence( - [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") ) result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr @@ -202,7 +207,7 @@ def test_astype_int(self, dtype): def test_tz_setter_raises(self): arr = DatetimeArray._from_sequence( - [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") ) with pytest.raises(AttributeError, match="tz_localize"): arr.tz = "UTC" @@ -458,14 +463,14 @@ def test_shift_requires_tzmatch(self): class TestSequenceToDT64NS: def test_tz_dtype_mismatch_raises(self): arr = DatetimeArray._from_sequence( - [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") ) with pytest.raises(TypeError, match="data is already tz-aware"): sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) def test_tz_dtype_matches(self): arr = DatetimeArray._from_sequence( - [pd.Timestamp("2000")], dtype=DatetimeTZDtype(tz="US/Central") + ["2000"], dtype=DatetimeTZDtype(tz="US/Central") ) result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) tm.assert_numpy_array_equal(arr._data, result) @@ -478,12 +483,12 @@ def arr1d(self, tz_naive_fixture): dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]") arr = DatetimeArray._from_sequence( [ - pd.Timestamp("2000-01-03"), - pd.Timestamp("2000-01-03"), - pd.NaT, - pd.Timestamp("2000-01-02"), - pd.Timestamp("2000-01-05"), - pd.Timestamp("2000-01-04"), + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", ], dtype=dtype, ) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 46d1df829af4b..06d2a50dde581 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -67,7 +67,7 @@ def test_from_sequence_strict_invalid_dtypes(self): with pytest.raises(TypeError, match="float64"): TimedeltaArray._from_sequence_strict(data) - with pytest.raises(TypeError, match="floating"): + with pytest.raises(ValueError, match="floating"): # object-dtype array of floats TimedeltaArray._from_sequence_strict(data.astype(object)) @@ -213,9 +213,7 @@ def test_sum_empty(self, skipna): assert result == Timedelta(0) def test_min_max(self): - vals = ["3H", "3H", "NaT", "2H", "5H", "4H"] - vals = [Timedelta(x) for x in vals] - arr = TimedeltaArray._from_sequence(vals) + arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) result = arr.min() expected = Timedelta("2H") diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 23b296f702582..95f338cbc3240 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -199,7 +199,7 @@ def test_cast_category_to_extension_dtype(self, expected): ) def test_consistent_casting(self, dtype, expected): # GH 28448 - result = Categorical(Timestamp("2015-01-01")).astype(dtype) + result = Categorical("2015-01-01").astype(dtype) assert result == expected