From 55b3efd781e5a0cdb87c08a6bca41a378886edd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 1 Aug 2022 23:50:28 -0400 Subject: [PATCH 1/4] TYP: pandas.core.series annotations from pandas-stubs --- pandas/_typing.py | 4 +- pandas/core/frame.py | 114 +++----------- pandas/core/generic.py | 154 +++++++++++++------ pandas/core/series.py | 341 +++++++++++++++++++---------------------- 4 files changed, 281 insertions(+), 332 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 48fd8f2d1256d..88d826ec454b2 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -39,6 +39,7 @@ Timedelta, Timestamp, ) + from pandas._libs.tslibs import BaseOffset from pandas.core.dtypes.dtypes import ExtensionDtype @@ -63,7 +64,6 @@ from pandas.core.window.rolling import BaseWindow from pandas.io.formats.format import EngFormatter - from pandas.tseries.offsets import DateOffset # numpy compatible types NumpyValueArrayLike = Union[npt._ScalarLike_co, npt.ArrayLike] @@ -113,7 +113,7 @@ Suffixes = Tuple[Optional[str], Optional[str]] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, List, Dict]] -Frequency = Union[str, "DateOffset"] +Frequency = Union[str, "BaseOffset"] Axes = Union[AnyArrayLike, List, range] RandomState = Union[ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49e5bc24786dd..db1fab1ea6d2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -25,6 +25,7 @@ Iterable, Iterator, Literal, + Mapping, Sequence, cast, overload, @@ -4982,12 +4983,12 @@ def _reindex_multi( def align( self, other, - join: str = "outer", + join: Literal["outer", "inner", "left", "right"] = "outer", axis: Axis | None = None, level: Level | None = None, copy: bool = True, fill_value=None, - method: str | None = None, + method: FillnaOptions | None = None, limit=None, fill_axis: Axis = 0, broadcast_axis: Axis | None = None, @@ -5465,128 +5466,53 @@ def rename( @overload def fillna( self, - value=..., + value: Hashable | Mapping | Series | DataFrame = ..., + *, method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: Literal[False] = ..., - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> DataFrame: ... @overload def fillna( self, - value, - method: FillnaOptions | None, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, + value: Hashable | Mapping | Series | DataFrame = ..., *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value, - *, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - method: FillnaOptions | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - method: FillnaOptions | None, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value, - *, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> None: ... @overload def fillna( self, - value, - method: FillnaOptions | None, + value: Hashable | Mapping | Series | DataFrame = ..., *, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value=..., method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: bool = ..., - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> DataFrame | None: ... + # error: Signature of "fillna" incompatible with supertype "NDFrame" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) @doc(NDFrame.fillna, **_shared_doc_kwargs) - def fillna( + def fillna( # type: ignore[override] self, - value: object | ArrayLike | None = None, + value: Hashable | Mapping | Series | DataFrame = None, method: FillnaOptions | None = None, axis: Axis | None = None, inplace: bool = False, - limit=None, - downcast=None, + limit: int | None = None, + downcast: dict | None = None, ) -> DataFrame | None: return super().fillna( value=value, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index caad4b45216ed..17be8f693f3d0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -48,8 +48,10 @@ DtypeArg, DtypeObj, FilePath, + FillnaOptions, FloatFormatType, FormattersType, + Frequency, IgnoreRaise, IndexKeyFunc, IndexLabel, @@ -6569,15 +6571,54 @@ def convert_dtypes( # ---------------------------------------------------------------------- # Filling NA's + @overload + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[False] = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> NDFrameT: + ... + + @overload + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: Literal[True], + limit: int | None = ..., + downcast: dict | None = ..., + ) -> None: + ... + + @overload + def fillna( + self: NDFrameT, + value: Hashable | Mapping | Series | DataFrame = ..., + *, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., + inplace: bool_t = ..., + limit: int | None = ..., + downcast: dict | None = ..., + ) -> NDFrameT | None: + ... + @doc(**_shared_doc_kwargs) def fillna( self: NDFrameT, - value=None, - method=None, - axis=None, + value: Hashable | Mapping | Series | DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, inplace: bool_t = False, - limit=None, - downcast=None, + limit: int | None = None, + downcast: dict | None = None, ) -> NDFrameT | None: """ Fill NA/NaN values using the specified method. @@ -6754,7 +6795,13 @@ def fillna( for k, v in value.items(): if k not in result: continue - downcast_k = downcast if not is_dict else downcast.get(k) + # error: Item "None" of "Optional[Dict[Any, Any]]" has no + # attribute "get" + downcast_k = ( + downcast + if not is_dict + else downcast.get(k) # type: ignore[union-attr] + ) result.loc[:, k] = result[k].fillna( v, limit=limit, downcast=downcast_k ) @@ -6765,7 +6812,10 @@ def fillna( result = self.T.fillna(value=value, limit=limit).T - new_data = result + # error: Incompatible types in assignment (expression has type + # "NDFrameT", variable has type "Union[ArrayManager, + # SingleArrayManager, BlockManager, SingleBlockManager]") + new_data = result # type: ignore[assignment] else: new_data = self._mgr.fillna( @@ -6790,7 +6840,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> NDFrameT: ... @@ -6801,7 +6851,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> None: ... @@ -6812,7 +6862,7 @@ def ffill( axis: None | Axis = ..., inplace: bool_t = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> NDFrameT | None: ... @@ -6823,7 +6873,7 @@ def ffill( axis: None | Axis = None, inplace: bool_t = False, limit: None | int = None, - downcast=None, + downcast: dict | None = None, ) -> NDFrameT | None: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. @@ -6846,7 +6896,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> NDFrameT: ... @@ -6857,7 +6907,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> None: ... @@ -6868,7 +6918,7 @@ def bfill( axis: None | Axis = ..., inplace: bool_t = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> NDFrameT | None: ... @@ -6879,7 +6929,7 @@ def bfill( axis: None | Axis = None, inplace: bool_t = False, limit: None | int = None, - downcast=None, + downcast: dict | None = None, ) -> NDFrameT | None: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. @@ -7968,11 +8018,11 @@ def clip( @doc(**_shared_doc_kwargs) def asfreq( self: NDFrameT, - freq, - method=None, + freq: Frequency, + method: FillnaOptions | None = None, how: str | None = None, normalize: bool_t = False, - fill_value=None, + fill_value: Hashable = None, ) -> NDFrameT: """ Convert time series to specified frequency. @@ -8282,15 +8332,15 @@ def between_time( def resample( self, rule, - axis=0, + axis: Axis = 0, closed: str | None = None, label: str | None = None, convention: str = "start", kind: str | None = None, loffset=None, base: int | None = None, - on=None, - level=None, + on: Level = None, + level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, group_keys: bool_t | lib.NoDefault = lib.no_default, @@ -9121,18 +9171,18 @@ def compare( @doc(**_shared_doc_kwargs) def align( - self, - other, - join="outer", - axis=None, - level=None, - copy=True, - fill_value=None, - method=None, - limit=None, - fill_axis=0, - broadcast_axis=None, - ): + self: NDFrameT, + other: NDFrameT, + join: Literal["outer", "inner", "left", "right"] = "outer", + axis: Axis | None = None, + level: Level = None, + copy: bool_t = True, + fill_value: Hashable = None, + method: FillnaOptions | None = None, + limit: int | None = None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> NDFrameT: """ Align two objects on their axes with the specified join method. @@ -9599,8 +9649,8 @@ def where( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> NDFrameT: @@ -9613,8 +9663,8 @@ def where( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> None: @@ -9627,8 +9677,8 @@ def where( other=..., *, inplace: bool_t = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: @@ -9650,8 +9700,8 @@ def where( cond, other=np.nan, inplace: bool_t = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = "raise", try_cast: bool_t | lib.NoDefault = lib.no_default, ) -> NDFrameT | None: @@ -9820,8 +9870,8 @@ def mask( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> NDFrameT: @@ -9834,8 +9884,8 @@ def mask( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> None: @@ -9848,8 +9898,8 @@ def mask( other=..., *, inplace: bool_t = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., try_cast: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: @@ -9872,8 +9922,8 @@ def mask( cond, other=np.nan, inplace: bool_t = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = "raise", try_cast: bool_t | lib.NoDefault = lib.no_default, ) -> NDFrameT | None: @@ -9903,7 +9953,11 @@ def mask( @doc(klass=_shared_doc_kwargs["klass"]) def shift( - self: NDFrameT, periods=1, freq=None, axis=0, fill_value=None + self: NDFrameT, + periods: int = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = None, ) -> NDFrameT: """ Shift index by desired number of periods with an optional time `freq`. diff --git a/pandas/core/series.py b/pandas/core/series.py index 206fcbe05d006..dab90ce7edf40 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -12,6 +12,7 @@ Hashable, Iterable, Literal, + Mapping, Sequence, Union, cast, @@ -40,6 +41,7 @@ DtypeObj, FilePath, FillnaOptions, + Frequency, IgnoreRaise, IndexKeyFunc, IndexLabel, @@ -161,7 +163,6 @@ import pandas.plotting if TYPE_CHECKING: - from pandas._typing import ( NumpySorter, NumpyValueArrayLike, @@ -742,7 +743,7 @@ def array(self) -> ExtensionArray: return self._mgr.array_values() # ops - def ravel(self, order="C"): + def ravel(self, order: str = "C") -> np.ndarray: """ Return the flattened underlying data as an ndarray. @@ -910,7 +911,9 @@ def axes(self) -> list[Index]: # Indexing Methods @Appender(NDFrame.take.__doc__) - def take(self, indices, axis=0, is_copy=None, **kwargs) -> Series: + def take( + self, indices, axis: Axis = 0, is_copy: bool | None = None, **kwargs + ) -> Series: if is_copy is not None: warnings.warn( "is_copy is deprecated and will be removed in a future version. " @@ -1316,7 +1319,7 @@ def _maybe_update_cacher( def _is_mixed_type(self): return False - def repeat(self, repeats, axis=None) -> Series: + def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: """ Repeat elements of a Series. @@ -1815,7 +1818,7 @@ def keys(self) -> Index: """ return self.index - def to_dict(self, into=dict): + def to_dict(self, into: type[dict] = dict) -> dict: """ Convert Series to {label -> value} dict or dict-like object. @@ -2000,8 +2003,8 @@ def _set_name(self, name, inplace=False) -> Series: def groupby( self, by=None, - axis=0, - level=None, + axis: Axis = 0, + level: Level = None, as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = no_default, @@ -2044,8 +2047,7 @@ def groupby( # Statistics, overridden ndarray methods # TODO: integrate bottleneck - - def count(self, level=None): + def count(self, level: Level = None): """ Return number of non-NA/null observations in the Series. @@ -2197,23 +2199,30 @@ def unique(self) -> ArrayLike: return super().unique() @overload - def drop_duplicates(self, keep=..., inplace: Literal[False] = ...) -> Series: - ... - - @overload - def drop_duplicates(self, keep, inplace: Literal[True]) -> None: + def drop_duplicates( + self, + keep: Literal["first", "last", False] = ..., + *, + inplace: Literal[False] = ..., + ) -> Series: ... @overload - def drop_duplicates(self, *, inplace: Literal[True]) -> None: + def drop_duplicates( + self, keep: Literal["first", "last", False] = ..., *, inplace: Literal[True] + ) -> None: ... @overload - def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None: + def drop_duplicates( + self, keep: Literal["first", "last", False] = ..., *, inplace: bool = ... + ) -> Series | None: ... @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates(self, keep="first", inplace=False) -> Series | None: + def drop_duplicates( + self, keep: Literal["first", "last", False] = "first", inplace=False + ) -> Series | None: """ Return Series with duplicate values removed. @@ -2297,7 +2306,7 @@ def drop_duplicates(self, keep="first", inplace=False) -> Series | None: else: return result - def duplicated(self, keep="first") -> Series: + def duplicated(self, keep: Literal["first", "last", False] = "first") -> Series: """ Indicate duplicate Series values. @@ -2377,7 +2386,7 @@ def duplicated(self, keep="first") -> Series: result = self._constructor(res, index=self.index) return result.__finalize__(self, method="duplicated") - def idxmin(self, axis=0, skipna=True, *args, **kwargs): + def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: """ Return the row label of the minimum value. @@ -2445,7 +2454,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): return np.nan return self.index[i] - def idxmax(self, axis=0, skipna=True, *args, **kwargs): + def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: """ Return the row label of the maximum value. @@ -2514,7 +2523,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): return np.nan return self.index[i] - def round(self, decimals=0, *args, **kwargs) -> Series: + def round(self, decimals: int = 0, *args, **kwargs) -> Series: """ Round each value in a Series to the given number of decimals. @@ -2639,7 +2648,13 @@ def quantile( # scalar return result.iloc[0] - def corr(self, other, method="pearson", min_periods=None) -> float: + def corr( + self, + other: Series, + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", + min_periods: int | None = None, + ) -> float: """ Compute correlation with `other` Series, excluding missing values. @@ -2847,7 +2862,7 @@ def diff(self, periods: int = 1) -> Series: self, method="diff" ) - def autocorr(self, lag=1) -> float: + def autocorr(self, lag: int = 1) -> float: """ Compute the lag-N autocorrelation. @@ -2892,7 +2907,7 @@ def autocorr(self, lag=1) -> float: """ return self.corr(self.shift(lag)) - def dot(self, other): + def dot(self, other: AnyArrayLike) -> Series | np.ndarray: """ Compute the dot product between the Series and the columns of other. @@ -3250,7 +3265,12 @@ def compare( result_names=result_names, ) - def combine(self, other, func, fill_value=None) -> Series: + def combine( + self, + other: Series | Hashable, + func: Callable[[Hashable, Hashable], Hashable], + fill_value: Hashable = None, + ) -> Series: """ Combine the Series with a Series or scalar according to `func`. @@ -3397,7 +3417,7 @@ def combine_first(self, other) -> Series: return this.where(notna(this), other) - def update(self, other) -> None: + def update(self, other: Series | Sequence | Mapping) -> None: """ Modify Series in place using values from passed Series. @@ -3925,7 +3945,12 @@ def sort_index( # type: ignore[override] key=key, ) - def argsort(self, axis=0, kind="quicksort", order=None) -> Series: + def argsort( + self, + axis: Axis = 0, + kind: SortKind = "quicksort", + order: None = None, + ) -> Series: """ Return the integer indices that would sort the Series values. @@ -3965,7 +3990,9 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series: res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) return res.__finalize__(self, method="argsort") - def nlargest(self, n=5, keep="first") -> Series: + def nlargest( + self, n: int = 5, keep: Literal["first", "last", "all"] = "first" + ) -> Series: """ Return the largest `n` elements. @@ -4220,7 +4247,7 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: dtype: object""" ), ) - def swaplevel(self, i=-2, j=-1, copy=True) -> Series: + def swaplevel(self, i: Level = -2, j: Level = -1, copy: bool = True) -> Series: """ Swap levels i and j in a :class:`MultiIndex`. @@ -4245,7 +4272,7 @@ def swaplevel(self, i=-2, j=-1, copy=True) -> Series: self, method="swaplevel" ) - def reorder_levels(self, order) -> Series: + def reorder_levels(self, order: Sequence[Level]) -> Series: """ Rearrange index levels using input order. @@ -4338,7 +4365,7 @@ def explode(self, ignore_index: bool = False) -> Series: return self._constructor(values, index=index, name=self.name) - def unstack(self, level=-1, fill_value=None) -> DataFrame: + def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame: """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. @@ -4387,7 +4414,11 @@ def unstack(self, level=-1, fill_value=None) -> DataFrame: # ---------------------------------------------------------------------- # function application - def map(self, arg, na_action=None) -> Series: + def map( + self, + arg: Callable | Mapping | Series, + na_action: Literal["ignore"] | None = None, + ) -> Series: """ Map values of Series according to an input mapping or function. @@ -4519,7 +4550,7 @@ def _gotitem(self, key, ndim, subset=None) -> Series: see_also=_agg_see_also_doc, examples=_agg_examples_doc, ) - def aggregate(self, func=None, axis=0, *args, **kwargs): + def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) @@ -4773,17 +4804,17 @@ def _needs_reindex_multi(self, axes, method, level) -> bool: ) def align( self, - other, - join="outer", - axis=None, - level=None, - copy=True, - fill_value=None, - method=None, - limit=None, - fill_axis=0, - broadcast_axis=None, - ): + other: Series, + join: Literal["outer", "inner", "left", "right"] = "outer", + axis: Axis | None = None, + level: Level = None, + copy: bool = True, + fill_value: Hashable = None, + method: FillnaOptions | None = None, + limit: int | None = None, + fill_axis: Axis = 0, + broadcast_axis: Axis | None = None, + ) -> Series: return super().align( other, join=join, @@ -5146,129 +5177,53 @@ def drop( # type: ignore[override] @overload def fillna( self, - value=..., + value: Hashable | Mapping | Series | DataFrame = ..., + *, method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: Literal[False] = ..., - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> Series: ... @overload def fillna( self, - value, - method: FillnaOptions | None, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value, - *, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, + value: Hashable | Mapping | Series | DataFrame = ..., *, - method: FillnaOptions | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - *, - method: FillnaOptions | None, - axis: Axis | None, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value, - *, - axis: Axis | None, + method: FillnaOptions | None = ..., + axis: Axis | None = ..., inplace: Literal[True], - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> None: ... @overload def fillna( self, - value, - method: FillnaOptions | None, + value: Hashable | Mapping | Series | DataFrame = ..., *, - inplace: Literal[True], - limit=..., - downcast=..., - ) -> None: - ... - - @overload - def fillna( - self, - value=..., method: FillnaOptions | None = ..., axis: Axis | None = ..., inplace: bool = ..., - limit=..., - downcast=..., + limit: int | None = ..., + downcast: dict | None = ..., ) -> Series | None: ... - # error: Cannot determine type of 'fillna' + # error: Signature of "fillna" incompatible with supertype "NDFrame" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) - @doc(NDFrame.fillna, **_shared_doc_kwargs) # type: ignore[has-type] - def fillna( + @doc(NDFrame.fillna, **_shared_doc_kwargs) + def fillna( # type: ignore[override] self, - value: object | ArrayLike | None = None, + value: Hashable | Mapping | Series | DataFrame = None, method: FillnaOptions | None = None, - axis=None, - inplace=False, - limit=None, - downcast=None, + axis: Axis | None = None, + inplace: bool = False, + limit: int | None = None, + downcast: dict | None = None, ) -> Series | None: return super().fillna( value=value, @@ -5315,7 +5270,7 @@ def replace( *, inplace: Literal[False] = ..., limit: int | None = ..., - regex=..., + regex: bool = ..., method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., ) -> Series: ... @@ -5328,7 +5283,7 @@ def replace( *, inplace: Literal[True], limit: int | None = ..., - regex=..., + regex: bool = ..., method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., ) -> None: ... @@ -5349,7 +5304,7 @@ def replace( # type: ignore[override] value=lib.no_default, inplace: bool = False, limit: int | None = None, - regex=False, + regex: bool = False, method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, ) -> Series | None: return super().replace( @@ -5401,7 +5356,9 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit): # error: Cannot determine type of 'shift' @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] - def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series: + def shift( + self, periods: int = 1, freq=None, axis: Axis = 0, fill_value: Hashable = None + ) -> Series: return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value ) @@ -5538,7 +5495,12 @@ def isin(self, values) -> Series: self, method="isin" ) - def between(self, left, right, inclusive="both") -> Series: + def between( + self, + left, + right, + inclusive: Literal["both", "neither", "left", "right"] = "both", + ) -> Series: """ Return boolean Series equivalent to left <= series <= right. @@ -5606,7 +5568,9 @@ def between(self, left, right, inclusive="both") -> Series: 3 False dtype: bool """ - if inclusive is True or inclusive is False: + # error: Non-overlapping identity check (left operand type: "Literal['both', + # 'neither', 'left', 'right']", right operand type: "Literal[False]") + if inclusive is True or inclusive is False: # type: ignore[comparison-overlap] warnings.warn( "Boolean inputs to the `inclusive` argument are deprecated in " "favour of `both` or `neither`.", @@ -5803,11 +5767,11 @@ def dropna( @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] def asfreq( self, - freq, - method=None, + freq: Frequency, + method: FillnaOptions | None = None, how: str | None = None, normalize: bool = False, - fill_value=None, + fill_value: Hashable = None, ) -> Series: return super().asfreq( freq=freq, @@ -5822,15 +5786,15 @@ def asfreq( def resample( self, rule, - axis=0, + axis: Axis = 0, closed: str | None = None, label: str | None = None, convention: str = "start", kind: str | None = None, loffset=None, base: int | None = None, - on=None, - level=None, + on: Level = None, + level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, group_keys: bool | lib.NoDefault = no_default, @@ -5851,7 +5815,12 @@ def resample( group_keys=group_keys, ) - def to_timestamp(self, freq=None, how="start", copy=True) -> Series: + def to_timestamp( + self, + freq=None, + how: Literal["s", "e", "start", "end"] = "start", + copy: bool = True, + ) -> Series: """ Cast to DatetimeIndex of Timestamps, at *beginning* of period. @@ -5880,7 +5849,7 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> Series: self, method="to_timestamp" ) - def to_period(self, freq=None, copy=True) -> Series: + def to_period(self, freq: str | None = None, copy: bool = True) -> Series: """ Convert Series from DatetimeIndex to PeriodIndex. @@ -5914,7 +5883,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> Series: ... @@ -5925,7 +5894,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> None: ... @@ -5936,7 +5905,7 @@ def ffill( axis: None | Axis = ..., inplace: bool = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> Series | None: ... @@ -5947,7 +5916,7 @@ def ffill( # type: ignore[override] axis: None | Axis = None, inplace: bool = False, limit: None | int = None, - downcast=None, + downcast: dict | None = None, ) -> Series | None: return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -5958,7 +5927,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> Series: ... @@ -5969,7 +5938,7 @@ def bfill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> None: ... @@ -5980,7 +5949,7 @@ def bfill( axis: None | Axis = ..., inplace: bool = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> Series | None: ... @@ -5991,7 +5960,7 @@ def bfill( # type: ignore[override] axis: None | Axis = None, inplace: bool = False, limit: None | int = None, - downcast=None, + downcast: dict | None = None, ) -> Series | None: return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -6039,10 +6008,10 @@ def where( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> Series: ... @@ -6053,10 +6022,10 @@ def where( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> None: ... @@ -6067,10 +6036,10 @@ def where( other=..., *, inplace: bool = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> Series | None: ... @@ -6084,10 +6053,10 @@ def where( # type: ignore[override] cond, other=lib.no_default, inplace: bool = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = lib.no_default, - try_cast=lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, ) -> Series | None: return super().where( cond, @@ -6105,10 +6074,10 @@ def mask( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> Series: ... @@ -6119,10 +6088,10 @@ def mask( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> None: ... @@ -6133,10 +6102,10 @@ def mask( other=..., *, inplace: bool = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> Series | None: ... @@ -6150,10 +6119,10 @@ def mask( # type: ignore[override] cond, other=np.nan, inplace: bool = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = lib.no_default, - try_cast=lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, ) -> Series | None: return super().mask( cond, From 3e45b417a5a62aa1487c5ffd332f8aa763cbd4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sat, 6 Aug 2022 13:13:33 -0400 Subject: [PATCH 2/4] and DataFrame --- pandas/core/frame.py | 268 +++++++++++++++++++++++++++-------------- pandas/core/generic.py | 4 +- 2 files changed, 180 insertions(+), 92 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index db1fab1ea6d2a..74b3e285edd75 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -68,6 +68,7 @@ Level, NaPosition, PythonFuncType, + QuantileInterpolation, ReadBuffer, Renamer, Scalar, @@ -1618,10 +1619,10 @@ def __rmatmul__(self, other) -> DataFrame: @classmethod def from_dict( cls, - data, + data: dict, orient: str = "columns", dtype: Dtype | None = None, - columns=None, + columns: Axes | None = None, ) -> DataFrame: """ Construct DataFrame from dict of array-like or dicts. @@ -1713,7 +1714,10 @@ def from_dict( if isinstance(list(data.values())[0], (Series, dict)): data = _from_nested_dict(data) else: - data, index = list(data.values()), list(data.keys()) + index = list(data.keys()) + # error: Incompatible types in assignment (expression has type + # "List[Any]", variable has type "Dict[Any, Any]") + data = list(data.values()) # type: ignore[assignment] elif orient == "columns" or orient == "tight": if columns is not None: raise ValueError(f"cannot use columns parameter with orient='{orient}'") @@ -1809,7 +1813,25 @@ def to_numpy( return result - def to_dict(self, orient: str = "dict", into=dict): + @overload + def to_dict( + self, + orient: Literal["dict", "list", "series", "split", "tight", "index"] = ..., + into: type[dict] = ..., + ) -> dict: + ... + + @overload + def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]: + ... + + def to_dict( + self, + orient: Literal[ + "dict", "list", "series", "split", "tight", "records", "index" + ] = "dict", + into: type[dict] = dict, + ) -> dict | list[dict]: """ Convert the DataFrame to a dictionary. @@ -1915,7 +1937,10 @@ def to_dict(self, orient: str = "dict", into=dict): # GH16122 into_c = com.standardize_mapping(into) - orient = orient.lower() + # error: Incompatible types in assignment (expression has type "str", + # variable has type "Literal['dict', 'list', 'series', 'split', 'tight', + # 'records', 'index']") + orient = orient.lower() # type: ignore[assignment] # GH32515 if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { "dict", @@ -2333,7 +2358,7 @@ def maybe_reorder( return cls(mgr) def to_records( - self, index=True, column_dtypes=None, index_dtypes=None + self, index: bool = True, column_dtypes=None, index_dtypes=None ) -> np.recarray: """ Convert DataFrame to a NumPy record array. @@ -2442,7 +2467,7 @@ def to_records( formats = [] for i, v in enumerate(arrays): - index = i + index_int = i # When the names and arrays are collected, we # first collect those in the DataFrame's index, @@ -2453,13 +2478,13 @@ def to_records( # # This check allows us to see whether we are # handling a name / array in the index or column. - if index < index_len: + if index_int < index_len: dtype_mapping = index_dtypes - name = index_names[index] + name = index_names[index_int] else: - index -= index_len + index_int -= index_len dtype_mapping = column_dtypes - name = self.columns[index] + name = self.columns[index_int] # We have a dictionary, so we get the data type # associated with the index or column (which can @@ -2469,8 +2494,8 @@ def to_records( if is_dict_like(dtype_mapping): if name in dtype_mapping: dtype_mapping = dtype_mapping[name] - elif index in dtype_mapping: - dtype_mapping = dtype_mapping[index] + elif index_int in dtype_mapping: + dtype_mapping = dtype_mapping[index_int] else: dtype_mapping = None @@ -4982,14 +5007,14 @@ def _reindex_multi( @doc(NDFrame.align, **_shared_doc_kwargs) def align( self, - other, + other: DataFrame, join: Literal["outer", "inner", "left", "right"] = "outer", axis: Axis | None = None, - level: Level | None = None, + level: Level = None, copy: bool = True, fill_value=None, method: FillnaOptions | None = None, - limit=None, + limit: int | None = None, fill_axis: Axis = 0, broadcast_axis: Axis | None = None, ) -> DataFrame: @@ -5022,6 +5047,7 @@ def set_axis( ) -> DataFrame | None: ... + # error: Signature of "set_axis" incompatible with supertype "NDFrame" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) @Appender( """ @@ -5062,7 +5088,9 @@ def set_axis( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): + def set_axis( # type: ignore[override] + self, labels, axis: Axis = 0, inplace: bool = False + ) -> DataFrame | None: return super().set_axis(labels, axis=axis, inplace=inplace) @Substitution(**_shared_doc_kwargs) @@ -5094,7 +5122,7 @@ def drop( axis: Axis = ..., index: IndexLabel = ..., columns: IndexLabel = ..., - level: Level | None = ..., + level: Level = ..., inplace: Literal[True], errors: IgnoreRaise = ..., ) -> None: @@ -5108,7 +5136,7 @@ def drop( axis: Axis = ..., index: IndexLabel = ..., columns: IndexLabel = ..., - level: Level | None = ..., + level: Level = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., ) -> DataFrame: @@ -5122,7 +5150,7 @@ def drop( axis: Axis = ..., index: IndexLabel = ..., columns: IndexLabel = ..., - level: Level | None = ..., + level: Level = ..., inplace: bool = ..., errors: IgnoreRaise = ..., ) -> DataFrame | None: @@ -5137,7 +5165,7 @@ def drop( # type: ignore[override] axis: Axis = 0, index: IndexLabel = None, columns: IndexLabel = None, - level: Level | None = None, + level: Level = None, inplace: bool = False, errors: IgnoreRaise = "raise", ) -> DataFrame | None: @@ -5298,7 +5326,7 @@ def rename( axis: Axis | None = ..., copy: bool = ..., inplace: Literal[True], - level: Level | None = ..., + level: Level = ..., errors: IgnoreRaise = ..., ) -> None: ... @@ -5313,7 +5341,7 @@ def rename( axis: Axis | None = ..., copy: bool = ..., inplace: Literal[False] = ..., - level: Level | None = ..., + level: Level = ..., errors: IgnoreRaise = ..., ) -> DataFrame: ... @@ -5328,7 +5356,7 @@ def rename( axis: Axis | None = ..., copy: bool = ..., inplace: bool = ..., - level: Level | None = ..., + level: Level = ..., errors: IgnoreRaise = ..., ) -> DataFrame | None: ... @@ -5342,7 +5370,7 @@ def rename( axis: Axis | None = None, copy: bool = True, inplace: bool = False, - level: Level | None = None, + level: Level = None, errors: IgnoreRaise = "ignore", ) -> DataFrame | None: """ @@ -5653,10 +5681,10 @@ def _replace_columnwise( @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( self, - periods=1, + periods: int = 1, freq: Frequency | None = None, axis: Axis = 0, - fill_value=lib.no_default, + fill_value: Hashable = lib.no_default, ) -> DataFrame: axis = self._get_axis_number(axis) @@ -6747,7 +6775,7 @@ def f(vals) -> tuple[np.ndarray, int]: @overload # type: ignore[override] def sort_values( self, - by, + by: IndexLabel, *, axis: Axis = ..., ascending=..., @@ -6762,7 +6790,7 @@ def sort_values( @overload def sort_values( self, - by, + by: IndexLabel, *, axis: Axis = ..., ascending=..., @@ -6781,9 +6809,9 @@ def sort_values( @Appender(NDFrame.sort_values.__doc__) def sort_values( # type: ignore[override] self, - by, + by: IndexLabel, axis: Axis = 0, - ascending=True, + ascending: bool | list[bool] | tuple[bool, ...] = True, inplace: bool = False, kind: str = "quicksort", na_position: str = "last", @@ -6795,9 +6823,17 @@ def sort_values( # type: ignore[override] ascending = validate_ascending(ascending) if not isinstance(by, list): by = [by] - if is_sequence(ascending) and len(by) != len(ascending): + # error: Argument 1 to "len" has incompatible type "Union[bool, List[bool]]"; + # expected "Sized" + if is_sequence(ascending) and ( + len(by) != len(ascending) # type: ignore[arg-type] + ): + # error: Argument 1 to "len" has incompatible type "Union[bool, + # List[bool]]"; expected "Sized" raise ValueError( - f"Length of ascending ({len(ascending)}) != length of by ({len(by)})" + "Length of ascending (" + f"{len(ascending)}) " # type: ignore[arg-type] + f"!= length of by ({len(by)})" ) if len(by) > 1: @@ -6856,7 +6892,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: Level = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[True], kind: SortKind = ..., @@ -6872,7 +6908,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: Level = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[False] = ..., kind: SortKind = ..., @@ -6888,7 +6924,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: Level = ..., ascending: bool | Sequence[bool] = ..., inplace: bool = ..., kind: SortKind = ..., @@ -6904,7 +6940,7 @@ def sort_index( def sort_index( # type: ignore[override] self, axis: Axis = 0, - level: Level | None = None, + level: Level = None, ascending: bool | Sequence[bool] = True, inplace: bool = False, kind: SortKind = "quicksort", @@ -7766,7 +7802,11 @@ def compare( ) def combine( - self, other: DataFrame, func, fill_value=None, overwrite: bool = True + self, + other: DataFrame, + func: Callable[[Series, Series], Series | Hashable], + fill_value=None, + overwrite: bool = True, ) -> DataFrame: """ Perform column-wise combine with another DataFrame. @@ -7928,7 +7968,11 @@ def combine( if isinstance(new_dtype, np.dtype): # if new_dtype is an EA Dtype, then `func` is expected to return # the correct dtype without any additional casting - arr = maybe_downcast_to_dtype(arr, new_dtype) + # error: No overload variant of "maybe_downcast_to_dtype" matches + # argument types "Union[Series, Hashable]", "dtype[Any]" + arr = maybe_downcast_to_dtype( # type: ignore[call-overload] + arr, new_dtype + ) result[col] = arr @@ -9019,7 +9063,7 @@ def melt( value_vars=None, var_name=None, value_name="value", - col_level: Level | None = None, + col_level: Level = None, ignore_index: bool = True, ) -> DataFrame: @@ -9287,7 +9331,7 @@ def any( axis: Axis = 0, bool_only: bool | None = None, skipna: bool = True, - level: Level | None = None, + level: Level = None, **kwargs, ) -> DataFrame | Series: ... @@ -9312,7 +9356,7 @@ def apply( func: AggFuncType, axis: Axis = 0, raw: bool = False, - result_type=None, + result_type: Literal["expand", "reduce", "broadcast"] | None = None, args=(), **kwargs, ): @@ -10379,10 +10423,11 @@ def cov( def corrwith( self, - other, + other: DataFrame | Series, axis: Axis = 0, - drop=False, - method="pearson", + drop: bool = False, + method: Literal["pearson", "kendall", "spearman"] + | Callable[[np.ndarray, np.ndarray], float] = "pearson", numeric_only: bool | lib.NoDefault = lib.no_default, ) -> Series: """ @@ -10550,9 +10595,7 @@ def c(x): # ---------------------------------------------------------------------- # ndarray-like stats methods - def count( - self, axis: Axis = 0, level: Level | None = None, numeric_only: bool = False - ): + def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False): """ Count non-NA cells for each column or row. @@ -11068,13 +11111,43 @@ def f(s): return data + @overload + def quantile( + self, + q: float = ..., + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series: + ... + + @overload def quantile( self, - q=0.5, + q: AnyArrayLike | Sequence[float], + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series | DataFrame: + ... + + @overload + def quantile( + self, + q: float | AnyArrayLike | Sequence[float] = ..., + axis: Axis = ..., + numeric_only: bool | lib.NoDefault = ..., + interpolation: QuantileInterpolation = ..., + ) -> Series | DataFrame: + ... + + def quantile( + self, + q: float | AnyArrayLike | Sequence[float] = 0.5, axis: Axis = 0, numeric_only: bool | lib.NoDefault = no_default, - interpolation: str = "linear", - ): + interpolation: QuantileInterpolation = "linear", + ) -> Series | DataFrame: """ Return values at the given quantile over requested axis. @@ -11154,8 +11227,14 @@ def quantile( if not is_list_like(q): # BlockManager.quantile expects listlike, so we wrap and unwrap here + # error: List item 0 has incompatible type "Union[float, Union[Union[ + # ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]"; + # expected "float" res_df = self.quantile( - [q], axis=axis, numeric_only=numeric_only, interpolation=interpolation + [q], # type: ignore[list-item] + axis=axis, + numeric_only=numeric_only, + interpolation=interpolation, ) res = res_df.iloc[0] if axis == 1 and len(self) == 0: @@ -11185,7 +11264,11 @@ def quantile( res = self._constructor([], index=q, columns=cols, dtype=dtype) return res.__finalize__(self, method="quantile") - res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation) + # error: Argument "qs" to "quantile" of "BlockManager" has incompatible type + # "Index"; expected "Float64Index" + res = data._mgr.quantile( + qs=q, axis=1, interpolation=interpolation # type: ignore[arg-type] + ) result = self._constructor(res) return result.__finalize__(self, method="quantile") @@ -11194,10 +11277,10 @@ def quantile( def asfreq( self, freq: Frequency, - method=None, + method: FillnaOptions | None = None, how: str | None = None, normalize: bool = False, - fill_value=None, + fill_value: Hashable = None, ) -> DataFrame: return super().asfreq( freq=freq, @@ -11211,15 +11294,15 @@ def asfreq( def resample( self, rule, - axis=0, + axis: Axis = 0, closed: str | None = None, label: str | None = None, convention: str = "start", kind: str | None = None, loffset=None, base: int | None = None, - on=None, - level=None, + on: Level = None, + level: Level = None, origin: str | TimestampConvertibleTypes = "start_day", offset: TimedeltaConvertibleTypes | None = None, group_keys: bool | lib.NoDefault = no_default, @@ -11334,7 +11417,7 @@ def to_period( setattr(new_obj, axis_name, new_ax) return new_obj - def isin(self, values) -> DataFrame: + def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: """ Whether each element in the DataFrame is contained in values. @@ -11429,8 +11512,13 @@ def isin(self, values) -> DataFrame: "to be passed to DataFrame.isin(), " f"you passed a '{type(values).__name__}'" ) + # error: Argument 2 to "isin" has incompatible type "Union[Sequence[Any], + # Mapping[Any, Any]]"; expected "Union[Union[ExtensionArray, + # ndarray[Any, Any]], Index, Series]" result = self._constructor( - algorithms.isin(self.values.ravel(), values).reshape(self.shape), + algorithms.isin( + self.values.ravel(), values # type: ignore[arg-type] + ).reshape(self.shape), self.index, self.columns, ) @@ -11575,7 +11663,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[False] = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> DataFrame: ... @@ -11586,7 +11674,7 @@ def ffill( axis: None | Axis = ..., inplace: Literal[True], limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> None: ... @@ -11597,7 +11685,7 @@ def ffill( axis: None | Axis = ..., inplace: bool = ..., limit: None | int = ..., - downcast=..., + downcast: dict | None = ..., ) -> DataFrame | None: ... @@ -11608,7 +11696,7 @@ def ffill( # type: ignore[override] axis: None | Axis = None, inplace: bool = False, limit: None | int = None, - downcast=None, + downcast: dict | None = None, ) -> DataFrame | None: return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) @@ -11661,8 +11749,8 @@ def bfill( # type: ignore[override] ) def clip( self: DataFrame, - lower=None, - upper=None, + lower: float | None = None, + upper: float | None = None, axis: Axis | None = None, inplace: bool = False, *args, @@ -11700,10 +11788,10 @@ def where( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> DataFrame: ... @@ -11714,10 +11802,10 @@ def where( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> None: ... @@ -11728,10 +11816,10 @@ def where( other=..., *, inplace: bool = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> DataFrame | None: ... @@ -11745,10 +11833,10 @@ def where( # type: ignore[override] cond, other=lib.no_default, inplace: bool = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = "raise", - try_cast=lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | None: return super().where( cond, @@ -11766,10 +11854,10 @@ def mask( other=..., *, inplace: Literal[False] = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> DataFrame: ... @@ -11780,10 +11868,10 @@ def mask( other=..., *, inplace: Literal[True], - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> None: ... @@ -11794,10 +11882,10 @@ def mask( other=..., *, inplace: bool = ..., - axis=..., - level=..., + axis: Axis | None = ..., + level: Level = ..., errors: IgnoreRaise | lib.NoDefault = ..., - try_cast=..., + try_cast: bool | lib.NoDefault = ..., ) -> DataFrame | None: ... @@ -11811,10 +11899,10 @@ def mask( # type: ignore[override] cond, other=np.nan, inplace: bool = False, - axis=None, - level=None, + axis: Axis | None = None, + level: Level = None, errors: IgnoreRaise | lib.NoDefault = "raise", - try_cast=lib.no_default, + try_cast: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | None: return super().mask( cond, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 17be8f693f3d0..22bcc6dfd743a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1742,7 +1742,7 @@ def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: raise ValueError(msg) @final - def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: + def _get_label_or_level_values(self, key: Level, axis: int = 0) -> np.ndarray: """ Return a 1-D array of values associated with `key`, a label or level from the given `axis`. @@ -6586,7 +6586,7 @@ def fillna( @overload def fillna( - self: NDFrameT, + self, value: Hashable | Mapping | Series | DataFrame = ..., *, method: FillnaOptions | None = ..., From 1f50233f81357c4d3f04bfe10d571642ba372706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 7 Aug 2022 09:41:14 -0400 Subject: [PATCH 3/4] more compatibility with pandas-stub tests --- pandas/_testing/asserters.py | 4 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/interval.py | 2 +- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/frame.py | 59 ++++++------------------------ pandas/core/generic.py | 14 +++---- pandas/core/groupby/groupby.py | 4 +- pandas/core/indexes/base.py | 2 +- pandas/core/series.py | 34 +++++++++++------ pandas/io/excel/_base.py | 2 +- pandas/io/formats/format.py | 2 +- 11 files changed, 52 insertions(+), 75 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index c7924dc451752..53c7273bfb552 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -865,7 +865,7 @@ def assert_series_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type="equiv", + check_index_type: bool | Literal["equiv"] = "equiv", check_series_type=True, check_less_precise: bool | int | NoDefault = no_default, check_names=True, @@ -1133,7 +1133,7 @@ def assert_frame_equal( left, right, check_dtype: bool | Literal["equiv"] = True, - check_index_type="equiv", + check_index_type: bool | Literal["equiv"] = "equiv", check_column_type="equiv", check_frame_type=True, check_less_precise=no_default, diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6c9b7adadb7b0..e0749feb49c7d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -985,7 +985,7 @@ def equals(self, other: object) -> bool: equal_na = self.isna() & other.isna() # type: ignore[operator] return bool((equal_values | equal_na).all()) - def isin(self, values) -> np.ndarray: + def isin(self, values) -> npt.NDArray[np.bool_]: """ Pointwise comparison for set containment in the given values. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index e7198a95c07f1..5086d38c4545a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1737,7 +1737,7 @@ def contains(self, other): other < self._right if self.open_right else other <= self._right ) - def isin(self, values) -> np.ndarray: + def isin(self, values) -> npt.NDArray[np.bool_]: if not hasattr(values, "dtype"): values = np.array(values) values = extract_array(values, extract_numpy=True) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index caddd12a2c2b4..9e2cbd86e83a8 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -202,7 +202,7 @@ def _maybe_convert_setitem_value(self, value): raise ValueError("Scalar must be NA or str") return value - def isin(self, values): + def isin(self, values) -> npt.NDArray[np.bool_]: if pa_version_under2p0: fallback_performancewarning(version="2") return super().isin(values) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74b3e285edd75..e32dd768dd2b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5996,7 +5996,8 @@ def set_index( @overload def reset_index( self, - level: Hashable | Sequence[Hashable] | None = ..., + level: IndexLabel = ..., + *, drop: bool = ..., inplace: Literal[False] = ..., col_level: Hashable = ..., @@ -6009,34 +6010,9 @@ def reset_index( @overload def reset_index( self, - level: Hashable | Sequence[Hashable] | None, - drop: bool, - inplace: Literal[True], - col_level: Hashable = ..., - col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, - ) -> None: - ... - - @overload - def reset_index( - self, - *, - drop: bool, - inplace: Literal[True], - col_level: Hashable = ..., - col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, - ) -> None: - ... - - @overload - def reset_index( - self, - level: Hashable | Sequence[Hashable] | None, + level: IndexLabel = ..., *, + drop: bool = ..., inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., @@ -6048,19 +6024,8 @@ def reset_index( @overload def reset_index( self, + level: IndexLabel = ..., *, - inplace: Literal[True], - col_level: Hashable = ..., - col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, - ) -> None: - ... - - @overload - def reset_index( - self, - level: Hashable | Sequence[Hashable] | None = ..., drop: bool = ..., inplace: bool = ..., col_level: Hashable = ..., @@ -6073,7 +6038,7 @@ def reset_index( @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) def reset_index( self, - level: Hashable | Sequence[Hashable] | None = None, + level: IndexLabel = None, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, @@ -6532,7 +6497,7 @@ def dropna( def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, - keep: Literal["first"] | Literal["last"] | Literal[False] = "first", + keep: Literal["first", "last", False] = "first", inplace: bool = False, ignore_index: bool = False, ) -> DataFrame | None: @@ -6629,7 +6594,7 @@ def drop_duplicates( def duplicated( self, subset: Hashable | Sequence[Hashable] | None = None, - keep: Literal["first"] | Literal["last"] | Literal[False] = "first", + keep: Literal["first", "last", False] = "first", ) -> Series: """ Return boolean Series denoting duplicate rows. @@ -6892,7 +6857,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[True], kind: SortKind = ..., @@ -6908,7 +6873,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[False] = ..., kind: SortKind = ..., @@ -6924,7 +6889,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: bool = ..., kind: SortKind = ..., @@ -6940,7 +6905,7 @@ def sort_index( def sort_index( # type: ignore[override] self, axis: Axis = 0, - level: Level = None, + level: IndexLabel = None, ascending: bool | Sequence[bool] = True, inplace: bool = False, kind: SortKind = "quicksort", diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 22bcc6dfd743a..38db81cd8a86d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3192,7 +3192,7 @@ def to_latex( multicolumn: bool_t | None = ..., multicolumn_format: str | None = ..., multirow: bool_t | None = ..., - caption: str | None = ..., + caption: str | tuple[str, str] | None = ..., label: str | None = ..., position: str | None = ..., ) -> str: @@ -3220,7 +3220,7 @@ def to_latex( multicolumn: bool_t | None = ..., multicolumn_format: str | None = ..., multirow: bool_t | None = ..., - caption: str | None = ..., + caption: str | tuple[str, str] | None = ..., label: str | None = ..., position: str | None = ..., ) -> None: @@ -3249,7 +3249,7 @@ def to_latex( multicolumn: bool_t | None = None, multicolumn_format: str | None = None, multirow: bool_t | None = None, - caption: str | None = None, + caption: str | tuple[str, str] | None = None, label: str | None = None, position: str | None = None, ) -> str | None: @@ -4882,7 +4882,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool_t | Sequence[bool_t] = ..., inplace: Literal[True], kind: SortKind = ..., @@ -4898,7 +4898,7 @@ def sort_index( self: NDFrameT, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool_t | Sequence[bool_t] = ..., inplace: Literal[False] = ..., kind: SortKind = ..., @@ -4914,7 +4914,7 @@ def sort_index( self: NDFrameT, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool_t | Sequence[bool_t] = ..., inplace: bool_t = ..., kind: SortKind = ..., @@ -4928,7 +4928,7 @@ def sort_index( def sort_index( self: NDFrameT, axis: Axis = 0, - level: Level | None = None, + level: IndexLabel = None, ascending: bool_t | Sequence[bool_t] = True, inplace: bool_t = False, kind: SortKind = "quicksort", diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8e0ed959fabc3..329ee57bf6d76 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2401,7 +2401,9 @@ def size(self) -> DataFrame | Series: result = self._obj_1d_constructor(result) if not self.as_index: - result = result.rename("size").reset_index() + # error: Incompatible types in assignment (expression has + # type "DataFrame", variable has type "Series") + result = result.rename("size").reset_index() # type: ignore[assignment] return self._reindex_output(result, fill_value=0) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ffb222c61f6ab..faa65554a455e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6425,7 +6425,7 @@ def _transform_index(self, func, *, level=None) -> Index: items = [func(x) for x in self] return Index(items, name=self.name, tupleize_cols=False) - def isin(self, values, level=None) -> np.ndarray: + def isin(self, values, level=None) -> npt.NDArray[np.bool_]: """ Return a boolean array where the index values are in `values`. diff --git a/pandas/core/series.py b/pandas/core/series.py index dab90ce7edf40..61fb12eeaeff2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1380,9 +1380,21 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: @overload def reset_index( self, - level: Level = ..., + level: IndexLabel = ..., *, - drop: bool = ..., + drop: Literal[False] = ..., + name: Level = ..., + inplace: Literal[False] = ..., + allow_duplicates: bool = ..., + ) -> DataFrame: + ... + + @overload + def reset_index( + self, + level: IndexLabel = ..., + *, + drop: Literal[True], name: Level = ..., inplace: Literal[False] = ..., allow_duplicates: bool = ..., @@ -1392,7 +1404,7 @@ def reset_index( @overload def reset_index( self, - level: Level = ..., + level: IndexLabel = ..., *, drop: bool = ..., name: Level = ..., @@ -1404,12 +1416,12 @@ def reset_index( @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) def reset_index( self, - level: Level = None, + level: IndexLabel = None, drop: bool = False, name: Level = lib.no_default, inplace: bool = False, allow_duplicates: bool = False, - ) -> Series | None: + ) -> DataFrame | Series | None: """ Generate a new DataFrame or Series with the index reset. @@ -1554,9 +1566,7 @@ def reset_index( name = self.name df = self.to_frame(name) - # error: Incompatible return value type (got "DataFrame", expected - # "Optional[Series]") - return df.reset_index( # type: ignore[return-value] + return df.reset_index( level=level, drop=drop, allow_duplicates=allow_duplicates ) return None @@ -3741,7 +3751,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[True], kind: SortKind = ..., @@ -3757,7 +3767,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: Literal[False] = ..., kind: SortKind = ..., @@ -3773,7 +3783,7 @@ def sort_index( self, *, axis: Axis = ..., - level: Level | None = ..., + level: IndexLabel = ..., ascending: bool | Sequence[bool] = ..., inplace: bool = ..., kind: SortKind = ..., @@ -3789,7 +3799,7 @@ def sort_index( def sort_index( # type: ignore[override] self, axis: Axis = 0, - level: Level | None = None, + level: IndexLabel = None, ascending: bool | Sequence[bool] = True, inplace: bool = False, kind: SortKind = "quicksort", diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 44152f100d390..b7d334737abbd 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -357,7 +357,7 @@ def read_excel( io, # sheet name is str or int -> DataFrame - sheet_name: str | int, + sheet_name: str | int = ..., header: int | Sequence[int] | None = ..., names: list[str] | None = ..., index_col: int | Sequence[int] | None = ..., diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 98219ef5eea36..27094fff5f812 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1036,7 +1036,7 @@ def to_latex( multicolumn: bool = False, multicolumn_format: str | None = None, multirow: bool = False, - caption: str | None = None, + caption: str | tuple[str, str] | None = None, label: str | None = None, position: str | None = None, ) -> str | None: From d8c09673a50f0b2b22a0ddf480e3468eac0cc538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 15 Aug 2022 15:25:58 -0400 Subject: [PATCH 4/4] mypy address line-off-by-one (merge?) issue --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a7cd5c9206d48..6cfca4ebdc612 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6814,9 +6814,8 @@ def sort_values( # type: ignore[override] # error: Argument 1 to "len" has incompatible type "Union[bool, # List[bool]]"; expected "Sized" raise ValueError( - "Length of ascending (" - f"{len(ascending)}) " # type: ignore[arg-type] - f"!= length of by ({len(by)})" + f"Length of ascending ({len(ascending)})" # type: ignore[arg-type] + f" != length of by ({len(by)})" ) if len(by) > 1: