diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3b419f8d1da2a..c3dce88ebc2ed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1527,6 +1527,14 @@ class TimelikeOps(DatetimeLikeArrayMixin): Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. """ + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + # Note: for now at least, caller is responsible for checking that + # strings are being passed. + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + # -------------------------------------------------------------- + def _round(self, freq, mode, ambiguous, nonexistent): # round the local times if is_datetime64tz_dtype(self.dtype): @@ -1595,6 +1603,24 @@ def _with_freq(self, freq): # Shared Constructor Helpers +def ensure_arraylike(scalars, copy: bool) -> Tuple[Any, bool]: + """ + Convert non-arraylike scalar sequences to ndarray. + """ + if not hasattr(scalars, "dtype"): + copy = False + if np.ndim(scalars) == 0: + scalars = list(scalars) + + scalars = np.asarray(scalars) + if len(scalars) == 0: + # Without casting, we would have float64 and so would reject later + # in from_sequence + scalars = scalars.astype(object) + + return scalars, copy + + def validate_periods(periods): """ If a `periods` argument is passed to the Datetime/Timedelta Array/Index diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7c6b38d9114ab..531a808dc94b4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -43,7 +43,12 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndexClass, + ABCMultiIndex, + ABCPandasArray, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core.algorithms import checked_add_with_arr @@ -300,6 +305,39 @@ def _simple_new( result._dtype = dtype return result + @classmethod + def _from_sequence_strict(cls, scalars, *, dtype=None, copy: bool = False): + # GH#37179 eventually _from_sequence should be strict + + scalars, copy = dtl.ensure_arraylike(scalars, copy) + + if scalars.dtype.kind == "M": + pass + elif scalars.dtype == object: + if isinstance(scalars, ABCMultiIndex): + raise TypeError("Cannot create a DatetimeArray from MultiIndex") + + inferred = lib.infer_dtype(scalars) + if inferred in ["datetime64", "date", "datetime", "empty"]: + pass + else: + msg = f"{inferred} scalars cannot be converted to datetime64[ns]" + raise TypeError(msg) + elif is_string_dtype(scalars.dtype): + # TODO: should go through from_sequence_of_strings? + pass + elif ( + is_categorical_dtype(scalars.dtype) and scalars.categories.dtype.kind == "M" + ): + # TODO: Could also use Categorical[object] + # with inferred_type as above? + pass + else: + msg = f"dtype {scalars.dtype} cannot be converted to datetime64[ns]" + raise TypeError(msg) + + return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) + @classmethod def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 035e6e84c6ec8..888bcc93cf497 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -217,6 +217,29 @@ def _simple_new( result._dtype = TD64NS_DTYPE return result + @classmethod + def _from_sequence_strict( + cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False + ) -> "TimedeltaArray": + # GH#37179 eventually we want _from_sequence to be strict + if dtype: + _validate_td64_dtype(dtype) + + data, copy = dtl.ensure_arraylike(data, copy) + + if data.dtype.kind == "m": + pass + elif data.dtype == object: + inferred = lib.infer_dtype(data) + if inferred in ["timedelta64", "timedelta", "empty"]: + pass + else: + raise ValueError(inferred) + else: + raise TypeError(data.dtype) + + return cls._from_sequence(data=data, copy=copy) + @classmethod def _from_sequence( cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 465ec821400e7..47f79ab6d60fb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1045,7 +1045,10 @@ def astype_nansafe( """ # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): - return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) + cls = dtype.construct_array_type() + if lib.infer_dtype(arr) == "string": + return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy) + return cls._from_sequence(arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1d8ee9cf2b73b..fb1a33409bd18 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -16,6 +16,16 @@ class TestDatetimeArrayConstructor: + def test_from_sequence_strict_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence_strict(mi) + + msg = "mixed scalars cannot be converted to datetime64" + with pytest.raises(TypeError, match=msg): + # GH#37179 + DatetimeArray._from_sequence_strict(mi._values) + def test_from_sequence_invalid_type(self): mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index c0567209ff91b..06d2a50dde581 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -61,6 +61,16 @@ def test_copy(self): assert arr._data is not data assert arr._data.base is not data + def test_from_sequence_strict_invalid_dtypes(self): + # GH#37179 + data = np.arange(5, dtype=np.float64) + with pytest.raises(TypeError, match="float64"): + TimedeltaArray._from_sequence_strict(data) + + with pytest.raises(ValueError, match="floating"): + # object-dtype array of floats + TimedeltaArray._from_sequence_strict(data.astype(object)) + class TestTimedeltaArray: # TODO: de-duplicate with test_npsum below diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index e3cdeb9c1951f..45e58a16947ca 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -172,7 +172,8 @@ def astype(self, dtype, copy=True): return self elif isinstance(dtype, StringDtype): value = self.astype(str) # numpy doesn'y like nested dicts - return dtype.construct_array_type()._from_sequence(value, copy=False) + cls = dtype.construct_array_type() + return cls._from_sequence_of_strings(value, copy=False) return np.array([dict(x) for x in self], dtype=dtype, copy=copy)