Skip to content

TYP: misc return types #57430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
Concatenate: Any = None

HashableT = TypeVar("HashableT", bound=Hashable)
HashableT2 = TypeVar("HashableT2", bound=Hashable)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why we need a second one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needed for _from_nested_dict. When using the same TypeVar in a function signature, the type checkers bind these to the same type:

def (x: HashableT) -> HashableT: ...

x(None) # return None
x((1, 2, 3)) # return tuple[int, int, int]

When we have a function that tries to bind two different sets of types, we need a new TypeVar

def (x: HashableT, y: HashableT2) -> dict[HashableT, dict[HashableT2, Any]]: ...

(pandas-stubs uses the name HashableT2)

MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)

# array-like
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
Literal,
cast,
Expand Down Expand Up @@ -105,7 +104,7 @@ class PandasObject(DirNamesMixin):
_cache: dict[str, Any]

@property
def _constructor(self) -> Callable[..., Self]:
def _constructor(self) -> type[Self]:
"""
Class constructor (for this class it's just `__class__`).
"""
Expand Down Expand Up @@ -1356,7 +1355,7 @@ def searchsorted(
sorter=sorter,
)

def drop_duplicates(self, *, keep: DropKeep = "first"):
def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:
duplicated = self._duplicated(keep=keep)
# error: Value of type "IndexOpsMixin" is not indexable
return self[~duplicated] # type: ignore[index]
Expand Down
122 changes: 105 additions & 17 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@
FormattersType,
Frequency,
FromDictOrient,
HashableT,
HashableT2,
IgnoreRaise,
IndexKeyFunc,
IndexLabel,
Expand All @@ -239,6 +241,7 @@
SortKind,
StorageOptions,
Suffixes,
T,
ToStataByteorder,
ToTimestampHow,
UpdateJoin,
Expand Down Expand Up @@ -643,10 +646,10 @@ class DataFrame(NDFrame, OpsMixin):
__pandas_priority__ = 4000

@property
def _constructor(self) -> Callable[..., DataFrame]:
def _constructor(self) -> type[DataFrame]:
return DataFrame

def _constructor_from_mgr(self, mgr, axes):
def _constructor_from_mgr(self, mgr, axes) -> DataFrame:
if self._constructor is DataFrame:
# we are pandas.DataFrame (or a subclass that doesn't override _constructor)
return DataFrame._from_mgr(mgr, axes=axes)
Expand All @@ -659,7 +662,7 @@ def _constructor_from_mgr(self, mgr, axes):
def _sliced_from_mgr(self, mgr, axes) -> Series:
return Series._from_mgr(mgr, axes)

def _constructor_sliced_from_mgr(self, mgr, axes):
def _constructor_sliced_from_mgr(self, mgr, axes) -> Series:
if self._constructor_sliced is Series:
ser = self._sliced_from_mgr(mgr, axes)
ser._name = None # caller is responsible for setting real name
Expand Down Expand Up @@ -1353,7 +1356,7 @@ def _get_values_for_csv(
decimal: str,
na_rep: str,
quoting, # int csv.QUOTE_FOO from stdlib
) -> Self:
) -> DataFrame:
# helper used by to_csv
mgr = self._mgr.get_values_for_csv(
float_format=float_format,
Expand Down Expand Up @@ -1831,7 +1834,7 @@ def from_dict(
a b 1 3
c 2 4
"""
index = None
index: list | Index | None = None
orient = orient.lower() # type: ignore[assignment]
if orient == "index":
if len(data) > 0:
Expand All @@ -1857,7 +1860,7 @@ def from_dict(
else:
realdata = data["data"]

def create_index(indexlist, namelist):
def create_index(indexlist, namelist) -> Index:
index: Index
if len(namelist) > 1:
index = MultiIndex.from_tuples(indexlist, names=namelist)
Expand Down Expand Up @@ -2700,6 +2703,42 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:

to_feather(self, path, **kwargs)

@overload
def to_markdown(
self,
buf: None = ...,
*,
mode: str = ...,
index: bool = ...,
storage_options: StorageOptions | None = ...,
**kwargs,
) -> str:
...

@overload
def to_markdown(
self,
buf: FilePath | WriteBuffer[str],
*,
mode: str = ...,
index: bool = ...,
storage_options: StorageOptions | None = ...,
**kwargs,
) -> None:
...

@overload
def to_markdown(
self,
buf: FilePath | WriteBuffer[str] | None,
*,
mode: str = ...,
index: bool = ...,
storage_options: StorageOptions | None = ...,
**kwargs,
) -> str | None:
...

@doc(
Series.to_markdown,
klass=_shared_doc_kwargs["klass"],
Expand Down Expand Up @@ -2881,6 +2920,39 @@ def to_parquet(
**kwargs,
)

@overload
def to_orc(
self,
path: None = ...,
*,
engine: Literal["pyarrow"] = ...,
index: bool | None = ...,
engine_kwargs: dict[str, Any] | None = ...,
) -> bytes:
...

@overload
def to_orc(
self,
path: FilePath | WriteBuffer[bytes],
*,
engine: Literal["pyarrow"] = ...,
index: bool | None = ...,
engine_kwargs: dict[str, Any] | None = ...,
) -> None:
...

@overload
def to_orc(
self,
path: FilePath | WriteBuffer[bytes] | None,
*,
engine: Literal["pyarrow"] = ...,
index: bool | None = ...,
engine_kwargs: dict[str, Any] | None = ...,
) -> bytes | None:
...

def to_orc(
self,
path: FilePath | WriteBuffer[bytes] | None = None,
Expand Down Expand Up @@ -4027,7 +4099,7 @@ def _setitem_slice(self, key: slice, value) -> None:
# backwards-compat, xref GH#31469
self.iloc[key] = value

def _setitem_array(self, key, value):
def _setitem_array(self, key, value) -> None:
# also raises Exception if object array with NA values
if com.is_bool_indexer(key):
# bool indexer is indexing along rows
Expand Down Expand Up @@ -4061,7 +4133,7 @@ def _setitem_array(self, key, value):
elif np.ndim(value) > 1:
# list of lists
value = DataFrame(value).values
return self._setitem_array(key, value)
self._setitem_array(key, value)

else:
self._iset_not_inplace(key, value)
Expand Down Expand Up @@ -4595,7 +4667,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:

return _eval(expr, inplace=inplace, **kwargs)

def select_dtypes(self, include=None, exclude=None) -> Self:
def select_dtypes(self, include=None, exclude=None) -> DataFrame:
"""
Return a subset of the DataFrame's columns based on the column dtypes.

Expand Down Expand Up @@ -5474,9 +5546,21 @@ def pop(self, item: Hashable) -> Series:
"""
return super().pop(item=item)

@overload
def _replace_columnwise(
self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[True], regex
) -> None:
...

@overload
def _replace_columnwise(
self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[False], regex
) -> Self:
...

def _replace_columnwise(
self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex
):
) -> Self | None:
"""
Dispatch to Series.replace column-wise.

Expand Down Expand Up @@ -5505,7 +5589,7 @@ def _replace_columnwise(
res._iset_item(i, newobj, inplace=inplace)

if inplace:
return
return None
return res.__finalize__(self)

@doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
Expand Down Expand Up @@ -11815,19 +11899,19 @@ def kurt(
product = prod

@doc(make_doc("cummin", ndim=2))
def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None is not documented and 0 seems to behave the same?!

return NDFrame.cummin(self, axis, skipna, *args, **kwargs)

@doc(make_doc("cummax", ndim=2))
def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
return NDFrame.cummax(self, axis, skipna, *args, **kwargs)

@doc(make_doc("cumsum", ndim=2))
def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)

@doc(make_doc("cumprod", 2))
def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)

def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
Expand Down Expand Up @@ -12710,8 +12794,12 @@ def values(self) -> np.ndarray:
return self._mgr.as_array()


def _from_nested_dict(data) -> collections.defaultdict:
new_data: collections.defaultdict = collections.defaultdict(dict)
def _from_nested_dict(
data: Mapping[HashableT, Mapping[HashableT2, T]],
) -> collections.defaultdict[HashableT2, dict[HashableT, T]]:
new_data: collections.defaultdict[
HashableT2, dict[HashableT, T]
] = collections.defaultdict(dict)
for index, s in data.items():
for col, v in s.items():
new_data[col][index] = v
Expand Down
Loading