diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 33b2f65340a3b..c3b4ce8a9f7d4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1938,7 +1938,7 @@ def __init__( freq = values.freq elif freq and values.freq: freq = to_offset(freq) - freq, _ = validate_inferred_freq(freq, values.freq, False) + freq = _validate_inferred_freq(freq, values.freq) if dtype is not None and dtype != values.dtype: # TODO: we only have tests for this for DTA, not TDA (2022-07-01) @@ -2025,6 +2025,39 @@ def freq(self, value) -> None: self._freq = value + @final + def _maybe_pin_freq(self, freq, validate_kwds: dict): + """ + Constructor helper to pin the appropriate `freq` attribute. Assumes + that self._freq is currently set to any freq inferred in + _from_sequence_not_strict. + """ + if freq is None: + # user explicitly passed None -> override any inferred_freq + self._freq = None + elif freq == "infer": + # if self._freq is *not* None then we already inferred a freq + # and there is nothing left to do + if self._freq is None: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + self._freq = to_offset(self.inferred_freq) + elif freq is lib.no_default: + # user did not specify anything, keep inferred freq if the original + # data had one, otherwise do nothing + pass + elif self._freq is None: + # We cannot inherit a freq from the data, so we need to validate + # the user-passed freq + freq = to_offset(freq) + type(self)._validate_frequency(self, freq, **validate_kwds) + self._freq = freq + else: + # Otherwise we just need to check that the user-passed freq + # doesn't conflict with the one we already have. + freq = to_offset(freq) + _validate_inferred_freq(freq, self._freq) + @final @classmethod def _validate_frequency(cls, index, freq: BaseOffset, **kwargs): @@ -2353,7 +2386,9 @@ def _is_dates_only(self) -> bool: # Shared Constructor Helpers -def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str): +def ensure_arraylike_for_datetimelike( + data, copy: bool, cls_name: str +) -> tuple[ArrayLike, bool]: if not hasattr(data, "dtype"): # e.g. list, tuple if not isinstance(data, (list, tuple)) and np.ndim(data) == 0: @@ -2426,9 +2461,9 @@ def validate_periods(periods: int | float | None) -> int | None: return periods -def validate_inferred_freq( - freq, inferred_freq, freq_infer -) -> tuple[BaseOffset | None, bool]: +def _validate_inferred_freq( + freq: BaseOffset | None, inferred_freq: BaseOffset | None +) -> BaseOffset | None: """ If the user passes a freq and another freq is inferred from passed data, require that they match. @@ -2437,12 +2472,10 @@ def validate_inferred_freq( ---------- freq : DateOffset or None inferred_freq : DateOffset or None - freq_infer : bool Returns ------- freq : DateOffset or None - freq_infer : bool Notes ----- @@ -2458,12 +2491,11 @@ def validate_inferred_freq( ) if freq is None: freq = inferred_freq - freq_infer = False - return freq, freq_infer + return freq -def maybe_infer_freq(freq): +def maybe_infer_freq(freq) -> tuple[BaseOffset | None, bool]: """ Comparing a DateOffset to the string "infer" raises, so we need to be careful about comparisons. Make a dummy variable `freq_infer` to diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6efff2483e1ae..b100a6a80f8ed 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -76,6 +76,7 @@ from collections.abc import Iterator from pandas._typing import ( + ArrayLike, DateTimeErrorChoices, DtypeObj, IntervalClosedType, @@ -327,13 +328,10 @@ def _from_sequence_not_strict( dayfirst: bool = False, yearfirst: bool = False, ambiguous: TimeAmbiguous = "raise", - ): + ) -> Self: """ A non-strict version of _from_sequence, called from DatetimeIndex.__new__. """ - explicit_none = freq is None - freq = freq if freq is not lib.no_default else None - freq, freq_infer = dtl.maybe_infer_freq(freq) # if the user either explicitly passes tz=None or a tz-naive dtype, we # disallows inferring a tz. @@ -349,13 +347,16 @@ def _from_sequence_not_strict( unit = None if dtype is not None: - if isinstance(dtype, np.dtype): - unit = np.datetime_data(dtype)[0] - else: - # DatetimeTZDtype - unit = dtype.unit + unit = dtl.dtype_to_unit(dtype) + + data, copy = dtl.ensure_arraylike_for_datetimelike( + data, copy, cls_name="DatetimeArray" + ) + inferred_freq = None + if isinstance(data, DatetimeArray): + inferred_freq = data.freq - subarr, tz, inferred_freq = _sequence_to_dt64( + subarr, tz = _sequence_to_dt64( data, copy=copy, tz=tz, @@ -372,26 +373,15 @@ def _from_sequence_not_strict( "Use obj.tz_localize(None) instead." ) - freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) - if explicit_none: - freq = None - data_unit = np.datetime_data(subarr.dtype)[0] data_dtype = tz_to_dtype(tz, data_unit) - result = cls._simple_new(subarr, freq=freq, dtype=data_dtype) + result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype) if unit is not None and unit != result.unit: # If unit was specified in user-passed dtype, cast to it here result = result.as_unit(unit) - if inferred_freq is None and freq is not None: - # this condition precludes `freq_infer` - cls._validate_frequency(result, freq, ambiguous=ambiguous) - - elif freq_infer: - # Set _freq directly to bypass duplicative _validate_frequency - # check. - result._freq = to_offset(result.inferred_freq) - + validate_kwds = {"ambiguous": ambiguous} + result._maybe_pin_freq(freq, validate_kwds) return result # error: Signature of "_generate_range" incompatible with supertype @@ -2180,7 +2170,7 @@ def std( def _sequence_to_dt64( - data, + data: ArrayLike, *, copy: bool = False, tz: tzinfo | None = None, @@ -2192,7 +2182,8 @@ def _sequence_to_dt64( """ Parameters ---------- - data : list-like + data : np.ndarray or ExtensionArray + dtl.ensure_arraylike_for_datetimelike has already been called. copy : bool, default False tz : tzinfo or None, default None dayfirst : bool, default False @@ -2209,21 +2200,11 @@ def _sequence_to_dt64( Where `unit` is "ns" unless specified otherwise by `out_unit`. tz : tzinfo or None Either the user-provided tzinfo or one inferred from the data. - inferred_freq : Tick or None - The inferred frequency of the sequence. Raises ------ TypeError : PeriodDType data is passed """ - inferred_freq = None - - data, copy = dtl.ensure_arraylike_for_datetimelike( - data, copy, cls_name="DatetimeArray" - ) - - if isinstance(data, DatetimeArray): - inferred_freq = data.freq # By this point we are assured to have either a numpy array or Index data, copy = maybe_convert_dtype(data, copy, tz=tz) @@ -2236,6 +2217,7 @@ def _sequence_to_dt64( if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, # also complex or categorical or other extension + data = cast(np.ndarray, data) copy = False if lib.infer_dtype(data, skipna=False) == "integer": data = data.astype(np.int64) @@ -2248,7 +2230,7 @@ def _sequence_to_dt64( yearfirst=yearfirst, creso=abbrev_to_npy_unit(out_unit), ) - return result, tz, None + return result, tz else: converted, inferred_tz = objects_to_datetime64( data, @@ -2273,7 +2255,7 @@ def _sequence_to_dt64( result, _ = _construct_from_dt64_naive( converted, tz=tz, copy=copy, ambiguous=ambiguous ) - return result, tz, None + return result, tz data_dtype = data.dtype @@ -2281,6 +2263,7 @@ def _sequence_to_dt64( # so we need to handle these types. if isinstance(data_dtype, DatetimeTZDtype): # DatetimeArray -> ndarray + data = cast(DatetimeArray, data) tz = _maybe_infer_tz(tz, data.tz) result = data._ndarray @@ -2289,6 +2272,7 @@ def _sequence_to_dt64( if isinstance(data, DatetimeArray): data = data._ndarray + data = cast(np.ndarray, data) result, copy = _construct_from_dt64_naive( data, tz=tz, copy=copy, ambiguous=ambiguous ) @@ -2299,6 +2283,7 @@ def _sequence_to_dt64( if data.dtype != INT64_DTYPE: data = data.astype(np.int64, copy=False) copy = False + data = cast(np.ndarray, data) result = data.view(out_dtype) if copy: @@ -2308,7 +2293,7 @@ def _sequence_to_dt64( assert result.dtype.kind == "M" assert result.dtype != "M8" assert is_supported_unit(get_unit_from_dtype(result.dtype)) - return result, tz, inferred_freq + return result, tz def _construct_from_dt64_naive( diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b64ab1154ef96..226e2568fdbf8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -26,7 +26,6 @@ is_supported_unit, npy_unit_to_abbrev, periods_per_second, - to_offset, ) from pandas._libs.tslibs.conversion import precision_from_unit from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit @@ -236,9 +235,7 @@ def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self: if dtype: dtype = _validate_td64_dtype(dtype) - data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) - freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) - freq = cast("Tick | None", freq) + data, freq = sequence_to_td64ns(data, copy=copy, unit=None) if dtype is not None: data = astype_overflowsafe(data, dtype=dtype, copy=False) @@ -256,38 +253,22 @@ def _from_sequence_not_strict( unit=None, ) -> Self: """ - A non-strict version of _from_sequence, called from TimedeltaIndex.__new__. + _from_sequence_not_strict but without responsibility for finding the + result's `freq`. """ if dtype: dtype = _validate_td64_dtype(dtype) assert unit not in ["Y", "y", "M"] # caller is responsible for checking - explicit_none = freq is None - freq = freq if freq is not lib.no_default else None - - freq, freq_infer = dtl.maybe_infer_freq(freq) - data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) - freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) - freq = cast("Tick | None", freq) - if explicit_none: - freq = None if dtype is not None: data = astype_overflowsafe(data, dtype=dtype, copy=False) - result = cls._simple_new(data, dtype=data.dtype, freq=freq) - - if inferred_freq is None and freq is not None: - # this condition precludes `freq_infer` - cls._validate_frequency(result, freq) - - elif freq_infer: - # Set _freq directly to bypass duplicative _validate_frequency - # check. - result._freq = to_offset(result.inferred_freq) + result = cls._simple_new(data, dtype=data.dtype, freq=inferred_freq) + result._maybe_pin_freq(freq, {}) return result # Signature of "_generate_range" incompatible with supertype