From fa6042becc9471e317b49b8a92b3dd0506986e4c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 6 Oct 2020 20:54:55 -0700 Subject: [PATCH 1/3] TYP: refactor datetimelike.py to satisfy typing --- pandas/core/arrays/datetimelike.py | 459 +++++++++++++++-------------- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 4 +- pandas/core/arrays/timedeltas.py | 6 +- setup.cfg | 3 - 5 files changed, 248 insertions(+), 226 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a1d6a2c8f4672..2943354847a3f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,17 @@ from datetime import datetime, timedelta import operator -from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, + cast, +) import warnings import numpy as np @@ -61,6 +72,9 @@ from pandas.tseries import frequencies +if TYPE_CHECKING: + from pandas.core.arrays import DatetimeArray, TimedeltaArray + DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType] @@ -116,6 +130,9 @@ def wrapper(self, other): class AttributesMixin: _data: np.ndarray + def __init__(self, data, dtype=None, freq=None, copy=False): + raise AbstractMethodError(self) + @classmethod def _simple_new( cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=None @@ -209,220 +226,6 @@ def _check_compatible_with( raise AbstractMethodError(self) -class DatelikeOps: - """ - Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. - """ - - @Substitution( - URL="https://docs.python.org/3/library/datetime.html" - "#strftime-and-strptime-behavior" - ) - def strftime(self, date_format): - """ - Convert to Index using specified date_format. - - Return an Index of formatted strings specified by date_format, which - supports the same string format as the python standard library. Details - of the string format can be found in `python string format - doc <%(URL)s>`__. - - Parameters - ---------- - date_format : str - Date format string (e.g. "%%Y-%%m-%%d"). - - Returns - ------- - ndarray - NumPy ndarray of formatted strings. - - See Also - -------- - to_datetime : Convert the given argument to datetime. - DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. - DatetimeIndex.round : Round the DatetimeIndex to the specified freq. - DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. - - Examples - -------- - >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), - ... periods=3, freq='s') - >>> rng.strftime('%%B %%d, %%Y, %%r') - Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', - 'March 10, 2018, 09:00:02 AM'], - dtype='object') - """ - result = self._format_native_types(date_format=date_format, na_rep=np.nan) - return result.astype(object) - - -class TimelikeOps: - """ - Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. - """ - - _round_doc = """ - Perform {op} operation on the data to the specified `freq`. - - Parameters - ---------- - freq : str or Offset - The frequency level to {op} the index to. Must be a fixed - frequency like 'S' (second) not 'ME' (month end). See - :ref:`frequency aliases ` for - a list of possible `freq` values. - ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' - Only relevant for DatetimeIndex: - - - 'infer' will attempt to infer fall dst-transition hours based on - order - - bool-ndarray where True signifies a DST time, False designates - a non-DST time (note that this flag is only applicable for - ambiguous times) - - 'NaT' will return NaT where there are ambiguous times - - 'raise' will raise an AmbiguousTimeError if there are ambiguous - times. - - .. versionadded:: 0.24.0 - - nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \ -default 'raise' - A nonexistent time does not exist in a particular timezone - where clocks moved forward due to DST. - - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times. - - .. versionadded:: 0.24.0 - - Returns - ------- - DatetimeIndex, TimedeltaIndex, or Series - Index of the same type for a DatetimeIndex or TimedeltaIndex, - or a Series with the same index for a Series. - - Raises - ------ - ValueError if the `freq` cannot be converted. - - Examples - -------- - **DatetimeIndex** - - >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') - >>> rng - DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', - '2018-01-01 12:01:00'], - dtype='datetime64[ns]', freq='T') - """ - - _round_example = """>>> rng.round('H') - DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', - '2018-01-01 12:00:00'], - dtype='datetime64[ns]', freq=None) - - **Series** - - >>> pd.Series(rng).dt.round("H") - 0 2018-01-01 12:00:00 - 1 2018-01-01 12:00:00 - 2 2018-01-01 12:00:00 - dtype: datetime64[ns] - """ - - _floor_example = """>>> rng.floor('H') - DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', - '2018-01-01 12:00:00'], - dtype='datetime64[ns]', freq=None) - - **Series** - - >>> pd.Series(rng).dt.floor("H") - 0 2018-01-01 11:00:00 - 1 2018-01-01 12:00:00 - 2 2018-01-01 12:00:00 - dtype: datetime64[ns] - """ - - _ceil_example = """>>> rng.ceil('H') - DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', - '2018-01-01 13:00:00'], - dtype='datetime64[ns]', freq=None) - - **Series** - - >>> pd.Series(rng).dt.ceil("H") - 0 2018-01-01 12:00:00 - 1 2018-01-01 12:00:00 - 2 2018-01-01 13:00:00 - dtype: datetime64[ns] - """ - - def _round(self, freq, mode, ambiguous, nonexistent): - # round the local times - if is_datetime64tz_dtype(self.dtype): - # operate on naive timestamps, then convert back to aware - naive = self.tz_localize(None) - result = naive._round(freq, mode, ambiguous, nonexistent) - aware = result.tz_localize( - self.tz, ambiguous=ambiguous, nonexistent=nonexistent - ) - return aware - - values = self.view("i8") - result = round_nsint64(values, mode, freq) - result = self._maybe_mask_results(result, fill_value=NaT) - return self._simple_new(result, dtype=self.dtype) - - @Appender((_round_doc + _round_example).format(op="round")) - def round(self, freq, ambiguous="raise", nonexistent="raise"): - return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) - - @Appender((_round_doc + _floor_example).format(op="floor")) - def floor(self, freq, ambiguous="raise", nonexistent="raise"): - return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) - - @Appender((_round_doc + _ceil_example).format(op="ceil")) - def ceil(self, freq, ambiguous="raise", nonexistent="raise"): - return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) - - def _with_freq(self, freq): - """ - Helper to get a view on the same data, with a new freq. - - Parameters - ---------- - freq : DateOffset, None, or "infer" - - Returns - ------- - Same type as self - """ - # GH#29843 - if freq is None: - # Always valid - pass - elif len(self) == 0 and isinstance(freq, BaseOffset): - # Always valid. In the TimedeltaArray case, we assume this - # is a Tick offset. - pass - else: - # As an internal method, we can ensure this assertion always holds - assert freq == "infer" - freq = to_offset(self.inferred_freq) - - arr = self.view() - arr._freq = freq - return arr - - DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") @@ -748,7 +551,7 @@ def _validate_shift_value(self, fill_value): # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): # kludge for #31971 since Period(integer) tries to cast to str - new_fill = Period._from_ordinal(fill_value, freq=self.dtype.freq) + new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: new_fill = self._scalar_type(fill_value) @@ -1076,6 +879,10 @@ def _validate_frequency(cls, index, freq, **kwargs): f"does not conform to passed frequency {freq.freqstr}" ) from e + @classmethod + def _generate_range(cls, start, end, periods, freq, *args, **kwargs): + raise AbstractMethodError(cls) + # monotonicity/uniqueness properties are called via frequencies.infer_freq, # see GH#23789 @@ -1432,6 +1239,7 @@ def __rsub__(self, other): # TODO: Can we simplify/generalize these cases at all? raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}") elif is_timedelta64_dtype(self.dtype): + self = cast("TimedeltaArray", self) return (-self) + other # We get here with e.g. datetime objects @@ -1584,6 +1392,225 @@ def median(self, axis: Optional[int] = None, skipna: bool = True, *args, **kwarg DatetimeLikeArrayMixin._add_comparison_ops() + +class DatelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. + """ + + @Substitution( + URL="https://docs.python.org/3/library/datetime.html" + "#strftime-and-strptime-behavior" + ) + def strftime(self, date_format): + """ + Convert to Index using specified date_format. + + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc <%(URL)s>`__. + + Parameters + ---------- + date_format : str + Date format string (e.g. "%%Y-%%m-%%d"). + + Returns + ------- + ndarray + NumPy ndarray of formatted strings. + + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), + ... periods=3, freq='s') + >>> rng.strftime('%%B %%d, %%Y, %%r') + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='object') + """ + result = self._format_native_types(date_format=date_format, na_rep=np.nan) + return result.astype(object) + + +_round_doc = """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + .. versionadded:: 0.24.0 + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='T') + """ + +_round_example = """>>> rng.round('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.round("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + """ + +_floor_example = """>>> rng.floor('H') + DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.floor("H") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + """ + +_ceil_example = """>>> rng.ceil('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 13:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.ceil("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 13:00:00 + dtype: datetime64[ns] + """ + + +class TimelikeOps(DatetimeLikeArrayMixin): + """ + Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. + """ + + def _round(self, freq, mode, ambiguous, nonexistent): + # round the local times + if is_datetime64tz_dtype(self.dtype): + # operate on naive timestamps, then convert back to aware + self = cast("DatetimeArray", self) + naive = self.tz_localize(None) + result = naive._round(freq, mode, ambiguous, nonexistent) + aware = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + return aware + + values = self.view("i8") + result = round_nsint64(values, mode, freq) + result = self._maybe_mask_results(result, fill_value=NaT) + return self._simple_new(result, dtype=self.dtype) + + @Appender((_round_doc + _round_example).format(op="round")) + def round(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) + + @Appender((_round_doc + _floor_example).format(op="floor")) + def floor(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + @Appender((_round_doc + _ceil_example).format(op="ceil")) + def ceil(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + # -------------------------------------------------------------- + # Frequency Methods + + def _with_freq(self, freq): + """ + Helper to get a view on the same data, with a new freq. + + Parameters + ---------- + freq : DateOffset, None, or "infer" + + Returns + ------- + Same type as self + """ + # GH#29843 + if freq is None: + # Always valid + pass + elif len(self) == 0 and isinstance(freq, BaseOffset): + # Always valid. In the TimedeltaArray case, we assume this + # is a Tick offset. + pass + else: + # As an internal method, we can ensure this assertion always holds + assert freq == "infer" + freq = to_offset(self.inferred_freq) + + arr = self.view() + arr._freq = freq + return arr + + # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1e879e32bed5f..fb8604a8c87ba 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -113,7 +113,7 @@ def f(self): return property(f) -class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps): +class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ed45b4da7279e..9cda95a6b4663 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -63,7 +63,7 @@ def f(self): return property(f) -class PeriodArray(PeriodMixin, dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): +class PeriodArray(PeriodMixin, dtl.DatelikeOps): """ Pandas ExtensionArray for storing Period data. @@ -148,7 +148,7 @@ class PeriodArray(PeriodMixin, dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, freq=None, dtype=None, copy=False): + def __init__(self, values, dtype=None, freq=None, copy=False): freq = validate_dtype_freq(dtype, freq) if freq is not None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f85e3e716bbf9..5dd76df4168f7 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -62,7 +62,7 @@ def f(self) -> np.ndarray: return property(f) -class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): +class TimedeltaArray(dtl.TimelikeOps): """ Pandas ExtensionArray for timedelta data. @@ -261,9 +261,7 @@ def _from_sequence_not_strict( return result @classmethod - def _generate_range( - cls, start, end, periods, freq, closed=None - ) -> "TimedeltaArray": + def _generate_range(cls, start, end, periods, freq, closed=None): periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): diff --git a/setup.cfg b/setup.cfg index 8ec10e7db5a5c..afc07e898e1b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -139,9 +139,6 @@ check_untyped_defs=False [mypy-pandas.core.apply] check_untyped_defs=False -[mypy-pandas.core.arrays.datetimelike] -check_untyped_defs=False - [mypy-pandas.core.arrays.sparse.array] check_untyped_defs=False From efd8e0b5a124874bcf288cdd2a903aa83145b1fc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Oct 2020 16:04:48 -0700 Subject: [PATCH 2/3] test fixup --- pandas/core/arrays/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9cda95a6b4663..bf2b3a0a1c9ba 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -148,7 +148,7 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype=None, freq=None, copy=False): + def __init__(self, values, freq=None, dtype=None, copy=False): freq = validate_dtype_freq(dtype, freq) if freq is not None: From da0da1472fc1d3f8b1512d60f24562b9ad0124fc Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Oct 2020 14:30:36 -0700 Subject: [PATCH 3/3] revert setup.cfg edit --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 88b85f8945587..8d3d79789a252 100644 --- a/setup.cfg +++ b/setup.cfg @@ -139,6 +139,9 @@ check_untyped_defs=False [mypy-pandas.core.apply] check_untyped_defs=False +[mypy-pandas.core.arrays.datetimelike] +check_untyped_defs=False + [mypy-pandas.core.arrays.sparse.array] check_untyped_defs=False