diff --git a/pandas/_typing.py b/pandas/_typing.py index 9465631e9fe20..85a69f38b3f67 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -118,6 +118,7 @@ Concatenate: Any = None HashableT = TypeVar("HashableT", bound=Hashable) +HashableT2 = TypeVar("HashableT2", bound=Hashable) MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping) # array-like diff --git a/pandas/core/base.py b/pandas/core/base.py index b9a57de073595..a3a86cb9ce410 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,7 +8,6 @@ from typing import ( TYPE_CHECKING, Any, - Callable, Generic, Literal, cast, @@ -105,7 +104,7 @@ class PandasObject(DirNamesMixin): _cache: dict[str, Any] @property - def _constructor(self) -> Callable[..., Self]: + def _constructor(self) -> type[Self]: """ Class constructor (for this class it's just `__class__`). """ @@ -1356,7 +1355,7 @@ def searchsorted( sorter=sorter, ) - def drop_duplicates(self, *, keep: DropKeep = "first"): + def drop_duplicates(self, *, keep: DropKeep = "first") -> Self: duplicated = self._duplicated(keep=keep) # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 660979540691e..6142e67ec1316 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -217,6 +217,8 @@ FormattersType, Frequency, FromDictOrient, + HashableT, + HashableT2, IgnoreRaise, IndexKeyFunc, IndexLabel, @@ -239,6 +241,7 @@ SortKind, StorageOptions, Suffixes, + T, ToStataByteorder, ToTimestampHow, UpdateJoin, @@ -643,10 +646,10 @@ class DataFrame(NDFrame, OpsMixin): __pandas_priority__ = 4000 @property - def _constructor(self) -> Callable[..., DataFrame]: + def _constructor(self) -> type[DataFrame]: return DataFrame - def _constructor_from_mgr(self, mgr, axes): + def _constructor_from_mgr(self, mgr, axes) -> DataFrame: if self._constructor is DataFrame: # we are pandas.DataFrame (or a subclass that doesn't override _constructor) return DataFrame._from_mgr(mgr, axes=axes) @@ -659,7 +662,7 @@ def _constructor_from_mgr(self, mgr, axes): def _sliced_from_mgr(self, mgr, axes) -> Series: return Series._from_mgr(mgr, axes) - def _constructor_sliced_from_mgr(self, mgr, axes): + def _constructor_sliced_from_mgr(self, mgr, axes) -> Series: if self._constructor_sliced is Series: ser = self._sliced_from_mgr(mgr, axes) ser._name = None # caller is responsible for setting real name @@ -1353,7 +1356,7 @@ def _get_values_for_csv( decimal: str, na_rep: str, quoting, # int csv.QUOTE_FOO from stdlib - ) -> Self: + ) -> DataFrame: # helper used by to_csv mgr = self._mgr.get_values_for_csv( float_format=float_format, @@ -1831,7 +1834,7 @@ def from_dict( a b 1 3 c 2 4 """ - index = None + index: list | Index | None = None orient = orient.lower() # type: ignore[assignment] if orient == "index": if len(data) > 0: @@ -1857,7 +1860,7 @@ def from_dict( else: realdata = data["data"] - def create_index(indexlist, namelist): + def create_index(indexlist, namelist) -> Index: index: Index if len(namelist) > 1: index = MultiIndex.from_tuples(indexlist, names=namelist) @@ -2700,6 +2703,42 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: to_feather(self, path, **kwargs) + @overload + def to_markdown( + self, + buf: None = ..., + *, + mode: str = ..., + index: bool = ..., + storage_options: StorageOptions | None = ..., + **kwargs, + ) -> str: + ... + + @overload + def to_markdown( + self, + buf: FilePath | WriteBuffer[str], + *, + mode: str = ..., + index: bool = ..., + storage_options: StorageOptions | None = ..., + **kwargs, + ) -> None: + ... + + @overload + def to_markdown( + self, + buf: FilePath | WriteBuffer[str] | None, + *, + mode: str = ..., + index: bool = ..., + storage_options: StorageOptions | None = ..., + **kwargs, + ) -> str | None: + ... + @doc( Series.to_markdown, klass=_shared_doc_kwargs["klass"], @@ -2881,6 +2920,39 @@ def to_parquet( **kwargs, ) + @overload + def to_orc( + self, + path: None = ..., + *, + engine: Literal["pyarrow"] = ..., + index: bool | None = ..., + engine_kwargs: dict[str, Any] | None = ..., + ) -> bytes: + ... + + @overload + def to_orc( + self, + path: FilePath | WriteBuffer[bytes], + *, + engine: Literal["pyarrow"] = ..., + index: bool | None = ..., + engine_kwargs: dict[str, Any] | None = ..., + ) -> None: + ... + + @overload + def to_orc( + self, + path: FilePath | WriteBuffer[bytes] | None, + *, + engine: Literal["pyarrow"] = ..., + index: bool | None = ..., + engine_kwargs: dict[str, Any] | None = ..., + ) -> bytes | None: + ... + def to_orc( self, path: FilePath | WriteBuffer[bytes] | None = None, @@ -4027,7 +4099,7 @@ def _setitem_slice(self, key: slice, value) -> None: # backwards-compat, xref GH#31469 self.iloc[key] = value - def _setitem_array(self, key, value): + def _setitem_array(self, key, value) -> None: # also raises Exception if object array with NA values if com.is_bool_indexer(key): # bool indexer is indexing along rows @@ -4061,7 +4133,7 @@ def _setitem_array(self, key, value): elif np.ndim(value) > 1: # list of lists value = DataFrame(value).values - return self._setitem_array(key, value) + self._setitem_array(key, value) else: self._iset_not_inplace(key, value) @@ -4595,7 +4667,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: return _eval(expr, inplace=inplace, **kwargs) - def select_dtypes(self, include=None, exclude=None) -> Self: + def select_dtypes(self, include=None, exclude=None) -> DataFrame: """ Return a subset of the DataFrame's columns based on the column dtypes. @@ -5474,9 +5546,21 @@ def pop(self, item: Hashable) -> Series: """ return super().pop(item=item) + @overload + def _replace_columnwise( + self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[True], regex + ) -> None: + ... + + @overload + def _replace_columnwise( + self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[False], regex + ) -> Self: + ... + def _replace_columnwise( self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex - ): + ) -> Self | None: """ Dispatch to Series.replace column-wise. @@ -5505,7 +5589,7 @@ def _replace_columnwise( res._iset_item(i, newobj, inplace=inplace) if inplace: - return + return None return res.__finalize__(self) @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) @@ -11815,19 +11899,19 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=2)) - def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cummin(self, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=2)) - def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cummax(self, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=2)) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 2)) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: @@ -12710,8 +12794,12 @@ def values(self) -> np.ndarray: return self._mgr.as_array() -def _from_nested_dict(data) -> collections.defaultdict: - new_data: collections.defaultdict = collections.defaultdict(dict) +def _from_nested_dict( + data: Mapping[HashableT, Mapping[HashableT2, T]], +) -> collections.defaultdict[HashableT2, dict[HashableT, T]]: + new_data: collections.defaultdict[ + HashableT2, dict[HashableT, T] + ] = collections.defaultdict(dict) for index, s in data.items(): for col, v in s.items(): new_data[col][index] = v diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 93e4468f91edd..149fbf7feb0dd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -478,8 +478,9 @@ def _validate_dtype(cls, dtype) -> DtypeObj | None: # ---------------------------------------------------------------------- # Construction + # error: Signature of "_constructor" incompatible with supertype "PandasObject" @property - def _constructor(self) -> Callable[..., Self]: + def _constructor(self) -> Callable[..., Self]: # type: ignore[override] """ Used when a manipulation result has the same dimensions as the original. @@ -495,7 +496,9 @@ def _constructor(self) -> Callable[..., Self]: _AXIS_LEN: int @final - def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs): + def _construct_axes_dict( + self, axes: Sequence[Axis] | None = None, **kwargs: AxisInt + ) -> dict: """Return an axes dictionary for myself.""" d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} # error: Argument 1 to "update" of "MutableMapping" has incompatible type @@ -719,14 +722,26 @@ def set_axis( """ return self._set_axis_nocheck(labels, axis, inplace=False) + @overload + def _set_axis_nocheck(self, labels, axis: Axis, inplace: Literal[False]) -> Self: + ... + + @overload + def _set_axis_nocheck(self, labels, axis: Axis, inplace: Literal[True]) -> None: + ... + + @overload + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool) -> Self | None: + ... + @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool): + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool) -> Self | None: if inplace: setattr(self, self._get_axis_name(axis), labels) - else: - obj = self.copy(deep=False) - setattr(obj, obj._get_axis_name(axis), labels) - return obj + return None + obj = self.copy(deep=False) + setattr(obj, obj._get_axis_name(axis), labels) + return obj @final def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: @@ -926,6 +941,51 @@ def squeeze(self, axis: Axis | None = None): # ---------------------------------------------------------------------- # Rename + @overload + def _rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: Literal[False] = ..., + level: Level | None = ..., + errors: str = ..., + ) -> Self: + ... + + @overload + def _rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: Literal[True], + level: Level | None = ..., + errors: str = ..., + ) -> None: + ... + + @overload + def _rename( + self, + mapper: Renamer | None = ..., + *, + index: Renamer | None = ..., + columns: Renamer | None = ..., + axis: Axis | None = ..., + copy: bool | None = ..., + inplace: bool, + level: Level | None = ..., + errors: str = ..., + ) -> Self | None: + ... + @final def _rename( self, @@ -1203,8 +1263,24 @@ class name return result return None + @overload + def _set_axis_name( + self, name, axis: Axis = ..., *, inplace: Literal[False] = ... + ) -> Self: + ... + + @overload + def _set_axis_name(self, name, axis: Axis = ..., *, inplace: Literal[True]) -> None: + ... + + @overload + def _set_axis_name(self, name, axis: Axis = ..., *, inplace: bool) -> Self | None: + ... + @final - def _set_axis_name(self, name, axis: Axis = 0, inplace: bool = False): + def _set_axis_name( + self, name, axis: Axis = 0, *, inplace: bool = False + ) -> Self | None: """ Set the name(s) of the axis. @@ -1266,6 +1342,7 @@ def _set_axis_name(self, name, axis: Axis = 0, inplace: bool = False): if not inplace: return renamed + return None # ---------------------------------------------------------------------- # Comparison Methods @@ -4542,12 +4619,10 @@ def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: mapper = {axis_name: f} - # error: Incompatible return value type (got "Optional[Self]", - # expected "Self") - # error: Argument 1 to "rename" of "NDFrame" has incompatible type - # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" # error: Keywords must be strings - return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] + # error: No overload variant of "_rename" of "NDFrame" matches + # argument type "dict[Literal['index', 'columns'], Callable[[Any], str]]" + return self._rename(**mapper) # type: ignore[call-overload, misc] @final def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self: @@ -4615,12 +4690,10 @@ def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self: axis_name = self._get_axis_name(axis) mapper = {axis_name: f} - # error: Incompatible return value type (got "Optional[Self]", - # expected "Self") - # error: Argument 1 to "rename" of "NDFrame" has incompatible type - # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" # error: Keywords must be strings - return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] + # error: No overload variant of "_rename" of "NDFrame" matches argument + # type "dict[Literal['index', 'columns'], Callable[[Any], str]]" + return self._rename(**mapper) # type: ignore[call-overload, misc] @overload def sort_values( @@ -9241,7 +9314,7 @@ def ranker(data): @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) def compare( self, - other, + other: Self, align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, @@ -9253,7 +9326,8 @@ def compare( f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" ) - mask = ~((self == other) | (self.isna() & other.isna())) + # error: Unsupported left operand type for & ("Self") + mask = ~((self == other) | (self.isna() & other.isna())) # type: ignore[operator] mask.fillna(True, inplace=True) if not keep_equal: @@ -9596,15 +9670,52 @@ def _align_series( return left, right, join_index + @overload + def _where( + self, + cond, + other=..., + *, + inplace: Literal[False] = ..., + axis: Axis | None = ..., + level=..., + ) -> Self: + ... + + @overload + def _where( + self, + cond, + other=..., + *, + inplace: Literal[True], + axis: Axis | None = ..., + level=..., + ) -> None: + ... + + @overload + def _where( + self, + cond, + other=..., + *, + inplace: bool, + axis: Axis | None = ..., + level=..., + ) -> Self | None: + ... + @final def _where( self, cond, other=lib.no_default, + *, inplace: bool = False, axis: Axis | None = None, level=None, - ): + ) -> Self | None: """ Equivalent to public method `where`, except that `other` is not applied as a function even if callable. Used in __setitem__. @@ -9950,7 +10061,7 @@ def where( ) other = common.apply_if_callable(other, self) - return self._where(cond, other, inplace, axis, level) + return self._where(cond, other, inplace=inplace, axis=axis, level=level) @overload def mask( @@ -11263,20 +11374,20 @@ def block_accum_func(blk_values): self, method=name ) - def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return self._accum_func( "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs ) - def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return self._accum_func( "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs ) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) @final @@ -11674,7 +11785,7 @@ def __ixor__(self, other) -> Self: # Misc methods @final - def _find_valid_index(self, *, how: str) -> Hashable | None: + def _find_valid_index(self, *, how: str) -> Hashable: """ Retrieves the index of the first valid value. @@ -11695,7 +11806,7 @@ def _find_valid_index(self, *, how: str) -> Hashable | None: @final @doc(position="first", klass=_shared_doc_kwargs["klass"]) - def first_valid_index(self) -> Hashable | None: + def first_valid_index(self) -> Hashable: """ Return index for {position} non-NA value or None, if no non-NA value is found. @@ -11771,7 +11882,7 @@ def first_valid_index(self) -> Hashable | None: @final @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) - def last_valid_index(self) -> Hashable | None: + def last_valid_index(self) -> Hashable: return self._find_valid_index(how="last") diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index f0eb7f44bf34e..36edf6116609b 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,9 +1,5 @@ from __future__ import annotations -from collections.abc import ( - Hashable, - Sequence, -) from typing import ( TYPE_CHECKING, Callable, @@ -44,11 +40,14 @@ from pandas.core.series import Series if TYPE_CHECKING: + from collections.abc import Hashable + from pandas._typing import ( AggFuncType, AggFuncTypeBase, AggFuncTypeDict, IndexLabel, + SequenceNotStr, ) from pandas import DataFrame @@ -546,9 +545,10 @@ def pivot( if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name - values = cast(Sequence[Hashable], values) indexed = data._constructor( - data[values]._values, index=multiindex, columns=values + data[values]._values, + index=multiindex, + columns=cast("SequenceNotStr", values), ) else: indexed = data._constructor_sliced(data[values]._values, index=multiindex) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1672c29f15763..eb5b545092307 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -571,7 +571,7 @@ def _init_dict( # ---------------------------------------------------------------------- @property - def _constructor(self) -> Callable[..., Series]: + def _constructor(self) -> type[Series]: return Series def _constructor_from_mgr(self, mgr, axes): @@ -5135,8 +5135,28 @@ def info( show_counts=show_counts, ) + @overload + def _replace_single( + self, to_replace, method: str, inplace: Literal[False], limit + ) -> Self: + ... + + @overload + def _replace_single( + self, to_replace, method: str, inplace: Literal[True], limit + ) -> None: + ... + + @overload + def _replace_single( + self, to_replace, method: str, inplace: bool, limit + ) -> Self | None: + ... + # TODO(3.0): this can be removed once GH#33302 deprecation is enforced - def _replace_single(self, to_replace, method: str, inplace: bool, limit): + def _replace_single( + self, to_replace, method: str, inplace: bool, limit + ) -> Self | None: """ Replaces values in a Series using the fill method specified when no replacement value is given in the replace method @@ -5155,7 +5175,7 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit): fill_f(values, limit=limit, mask=mask) if inplace: - return + return None return result def memory_usage(self, index: bool = True, deep: bool = False) -> int: @@ -6396,17 +6416,17 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=1)) - def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cummin(self, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=1)) - def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cummax(self, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=1)) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 1)) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): + def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)