From c1a8ad6c2ebd2e913347c0d02021b888cd3390e0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 15 Mar 2020 10:32:17 -0700 Subject: [PATCH 1/4] annotations --- pandas/core/arrays/base.py | 18 ++++++------- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/period.py | 42 ++++++++++++------------------ pandas/core/base.py | 5 ++-- pandas/core/ops/dispatch.py | 10 +++---- pandas/io/pytables.py | 5 ++-- 6 files changed, 37 insertions(+), 45 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6aa303dd04703..1991c8e8cff01 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -21,7 +21,7 @@ from pandas.core.dtypes.common import is_array_like, is_list_like from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import ops @@ -590,7 +590,7 @@ def dropna(self): """ return self[~self.isna()] - def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: + def shift(self, periods: int = 1, fill_value: object = None) -> "ExtensionArray": """ Shift values by desired number. @@ -727,7 +727,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ return self.astype(object), np.nan - def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ABCExtensionArray]: + def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray"]: """ Encode the extension array as an enumerated type. @@ -832,7 +832,7 @@ def repeat(self, repeats, axis=None): def take( self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None - ) -> ABCExtensionArray: + ) -> "ExtensionArray": """ Take elements from an array. @@ -921,7 +921,7 @@ def take(self, indices, allow_fill=False, fill_value=None): # pandas.api.extensions.take raise AbstractMethodError(self) - def copy(self) -> ABCExtensionArray: + def copy(self) -> "ExtensionArray": """ Return a copy of the array. @@ -931,7 +931,7 @@ def copy(self) -> ABCExtensionArray: """ raise AbstractMethodError(self) - def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: + def view(self, dtype=None) -> ArrayLike: """ Return a view on the array. @@ -1001,7 +1001,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: # Reshaping # ------------------------------------------------------------------------ - def ravel(self, order="C") -> ABCExtensionArray: + def ravel(self, order="C") -> "ExtensionArray": """ Return a flattened view on this array. @@ -1022,8 +1022,8 @@ def ravel(self, order="C") -> ABCExtensionArray: @classmethod def _concat_same_type( - cls, to_concat: Sequence[ABCExtensionArray] - ) -> ABCExtensionArray: + cls, to_concat: Sequence["ExtensionArray"] + ) -> "ExtensionArray": """ Concatenate multiple array. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 105d9581b1a25..5bcefcfbc2d19 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -130,7 +130,7 @@ class AttributesMixin: _data: np.ndarray @classmethod - def _simple_new(cls, values, **kwargs): + def _simple_new(cls, values: np.ndarray, **kwargs): raise AbstractMethodError(cls) @property diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5eeee644b3854..680b37c955278 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -31,13 +31,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ( - ABCIndexClass, - ABCPeriod, - ABCPeriodArray, - ABCPeriodIndex, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algos @@ -48,7 +42,7 @@ from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick -def _field_accessor(name, alias, docstring=None): +def _field_accessor(name: str, alias: int, docstring=None): def f(self): base, mult = libfrequencies.get_freq_code(self.freq) result = get_period_field_arr(alias, self.asi8, base) @@ -170,7 +164,7 @@ def __init__(self, values, freq=None, dtype=None, copy=False): self._dtype = PeriodDtype(freq) @classmethod - def _simple_new(cls, values: np.ndarray, freq=None, **kwargs): + def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray": # alias for PeriodArray.__init__ assert isinstance(values, np.ndarray) and values.dtype == "i8" return cls(values, freq=freq, **kwargs) @@ -181,7 +175,7 @@ def _from_sequence( scalars: Sequence[Optional[Period]], dtype: Optional[PeriodDtype] = None, copy: bool = False, - ) -> ABCPeriodArray: + ) -> "PeriodArray": if dtype: freq = dtype.freq else: @@ -202,11 +196,13 @@ def _from_sequence( return cls(ordinals, freq=freq) @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def _from_sequence_of_strings( + cls, strings, dtype=None, copy=False + ) -> "PeriodArray": return cls._from_sequence(strings, dtype, copy) @classmethod - def _from_datetime64(cls, data, freq, tz=None): + def _from_datetime64(cls, data, freq, tz=None) -> "PeriodArray": """ Construct a PeriodArray from a datetime64 array @@ -270,12 +266,12 @@ def _check_compatible_with(self, other, setitem: bool = False): # Data / Attributes @cache_readonly - def dtype(self): + def dtype(self) -> PeriodDtype: return self._dtype # error: Read-only property cannot override read-write property [misc] @property # type: ignore - def freq(self): + def freq(self) -> DateOffset: """ Return the frequency object for this PeriodArray. """ @@ -402,7 +398,7 @@ def __arrow_array__(self, type=None): daysinmonth = days_in_month @property - def is_leap_year(self): + def is_leap_year(self) -> np.ndarray: """ Logical indicating if the date belongs to a leap year. """ @@ -458,12 +454,6 @@ def to_timestamp(self, freq=None, how="start"): new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) return DatetimeArray._from_sequence(new_data, freq="infer") - # -------------------------------------------------------------------- - # Array-like / EA-Interface Methods - - def _values_for_argsort(self): - return self._data - # -------------------------------------------------------------------- def _time_shift(self, periods, freq=None): @@ -495,7 +485,7 @@ def _time_shift(self, periods, freq=None): def _box_func(self): return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) - def asfreq(self, freq=None, how="E"): + def asfreq(self, freq=None, how="E") -> "PeriodArray": """ Convert the Period Array/Index to the specified frequency `freq`. @@ -557,7 +547,7 @@ def asfreq(self, freq=None, how="E"): # ------------------------------------------------------------------ # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): if boxed: return str return "'{}'".format @@ -584,7 +574,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): # ------------------------------------------------------------------ - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # We handle Period[T] -> Period[U] # Our parent handles everything else. dtype = pandas_dtype(dtype) @@ -965,8 +955,8 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): if end is not None: end = Period(end, freq) - is_start_per = isinstance(start, ABCPeriod) - is_end_per = isinstance(end, ABCPeriod) + is_start_per = isinstance(start, Period) + is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: raise ValueError("start and end must have same freq") diff --git a/pandas/core/base.py b/pandas/core/base.py index 40ff0640a5bc4..764a6fdfd4954 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins import textwrap -from typing import Dict, FrozenSet, List, Optional, Union +from typing import Dict, FrozenSet, List, Optional import numpy as np @@ -34,6 +34,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops +from pandas.typing import ArrayLike _shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( @@ -598,7 +599,7 @@ class IndexOpsMixin: ) @property - def _values(self) -> Union[ExtensionArray, np.ndarray]: + def _values(self) -> ArrayLike: # must be defined here as a property for mypy raise AbstractMethodError(self) diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index 61a3032c7a02c..5c34cb20be266 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -1,10 +1,12 @@ """ Functions for defining unary operations. """ -from typing import Any, Union +from typing import Any import numpy as np +from pandas._typing import ArrayLike + from pandas.core.dtypes.common import ( is_datetime64_dtype, is_extension_array_dtype, @@ -13,7 +15,7 @@ is_scalar, is_timedelta64_dtype, ) -from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries +from pandas.core.dtypes.generic import ABCSeries from pandas.core.construction import array @@ -93,9 +95,7 @@ def should_series_dispatch(left, right, op): return False -def dispatch_to_extension_op( - op, left: Union[ABCExtensionArray, np.ndarray], right: Any, -): +def dispatch_to_extension_op(op, left: ArrayLike, right: Any): """ Assume that left or right is a Series backed by an ExtensionArray, apply the operator defined by op. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 7aeed5c316d7f..a64f7db87cc07 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2202,7 +2202,7 @@ def __eq__(self, other: Any) -> bool: for a in ["name", "cname", "dtype", "pos"] ) - def set_data(self, data: Union[np.ndarray, ABCExtensionArray]): + def set_data(self, data: ArrayLike): assert data is not None assert self.dtype is None @@ -4959,11 +4959,12 @@ def _dtype_to_kind(dtype_str: str) -> str: return kind -def _get_data_and_dtype_name(data: Union[np.ndarray, ABCExtensionArray]): +def _get_data_and_dtype_name(data: ArrayLike): """ Convert the passed data into a storable form and a dtype string. """ if is_categorical_dtype(data.dtype): + assert isinstance(data, Categorical) # for mypy data = data.codes # For datetime64tz we need to drop the TZ in tests TODO: why? From 9bd7fa193b9bd272147df5e3594c9704e7243936 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 15 Mar 2020 10:47:32 -0700 Subject: [PATCH 2/4] revert --- pandas/core/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 764a6fdfd4954..40ff0640a5bc4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins import textwrap -from typing import Dict, FrozenSet, List, Optional +from typing import Dict, FrozenSet, List, Optional, Union import numpy as np @@ -34,7 +34,6 @@ from pandas.core.arrays import ExtensionArray from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops -from pandas.typing import ArrayLike _shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( @@ -599,7 +598,7 @@ class IndexOpsMixin: ) @property - def _values(self) -> ArrayLike: + def _values(self) -> Union[ExtensionArray, np.ndarray]: # must be defined here as a property for mypy raise AbstractMethodError(self) From cf97e3e22d7f82747b6f7be541b62441b555e012 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 09:00:34 -0700 Subject: [PATCH 3/4] update docstring --- pandas/core/arrays/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1991c8e8cff01..9aeaa827fe8b0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -942,8 +942,8 @@ def view(self, dtype=None) -> ArrayLike: Returns ------- - ExtensionArray - A view of the :class:`ExtensionArray`. + ExtensionArray or np.ndarray + A view on the :class:`ExtensionArray`'s data. """ # NB: # - This must return a *new* object referencing the same data, not self. From 1ef8585047db0e1465e0510d00d056cc6c48a559 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 18:12:52 -0700 Subject: [PATCH 4/4] update check --- pandas/io/pytables.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a64f7db87cc07..544d45999c14b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4963,8 +4963,7 @@ def _get_data_and_dtype_name(data: ArrayLike): """ Convert the passed data into a storable form and a dtype string. """ - if is_categorical_dtype(data.dtype): - assert isinstance(data, Categorical) # for mypy + if isinstance(data, Categorical): data = data.codes # For datetime64tz we need to drop the TZ in tests TODO: why?