Skip to content

TYP: fix a few types #54976

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@

from pandas.core.dtypes.dtypes import SparseDtype

from pandas import util
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are other methods in this module that are not meant to be public though: capitalize_first_letter, cache_readonly, Substitution, Appender

Copy link
Member Author

@twoertwein twoertwein Sep 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I think we could do __all__ = [all_classes_functions_in_the_docs] (in utils/__init__) to account for that. And/or we could deprecate it from utils and put it as part of a different namespace.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__all__ only changes behavior of from foo import *, no? What do we think of moving these functions (can we make a list of them?) to the top namespace?

Does this hold up other aspects of this PR? If not, I would suggest moving to a separate PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to remove this change from this PR. I will open an issue to first discuss how this should be handled.

__all__ affects from pandas.utils import * but (more importantly) type checkers, use it (next to other rules) to determine which symbols are meant to be public: if __all__ is present, only those symbols listed in it are considered public.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if __all__ is present, only those symbols listed in it are considered public.

This doesn't seem to be the case with mypy:

# test/__init__.py
x = 5
y = 6

__all__ = ['y']

# bar.py
import test

print(test.x)
print(test.y)

mypy bar.py reports no issues for me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must have misremembered that (pyright also allows that).

Pyright describes how it handles it: https://github.com/microsoft/pyright/blob/main/docs/typed-libraries.md#library-interface

from pandas.tseries.api import infer_freq
from pandas.tseries import offsets

Expand Down Expand Up @@ -348,6 +349,7 @@
"to_timedelta",
"tseries",
"unique",
"util",
"value_counts",
"wide_to_long",
]
24 changes: 16 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1926,11 +1926,17 @@ def to_dict(
self,
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
into: type[dict] = ...,
index: bool = ...,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Present in the implementation but missing in the overloads

) -> dict:
...

@overload
def to_dict(self, orient: Literal["records"], into: type[dict] = ...) -> list[dict]:
def to_dict(
self,
orient: Literal["records"],
into: type[dict] = ...,
index: bool = ...,
) -> list[dict]:
...

@deprecate_nonkeyword_arguments(
Expand Down Expand Up @@ -11297,7 +11303,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
def any( # type: ignore[override]
self,
*,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool = False,
skipna: bool = True,
**kwargs,
Expand All @@ -11312,7 +11318,7 @@ def any( # type: ignore[override]
@doc(make_doc("all", ndim=2))
def all(
self,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool = False,
skipna: bool = True,
**kwargs,
Expand Down Expand Up @@ -11711,6 +11717,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Present in the implementation but missing in the overloads

) -> Series:
...

Expand All @@ -11721,6 +11728,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
) -> Series | DataFrame:
...

Expand All @@ -11731,6 +11739,7 @@ def quantile(
axis: Axis = ...,
numeric_only: bool = ...,
interpolation: QuantileInterpolation = ...,
method: Literal["single", "table"] = ...,
) -> Series | DataFrame:
...

Expand Down Expand Up @@ -11830,11 +11839,10 @@ def quantile(

if not is_list_like(q):
# BlockManager.quantile expects listlike, so we wrap and unwrap here
# error: List item 0 has incompatible type "Union[float, Union[Union[
# ExtensionArray, ndarray[Any, Any]], Index, Series], Sequence[float]]";
# expected "float"
res_df = self.quantile( # type: ignore[call-overload]
[q],
# error: List item 0 has incompatible type "float | ExtensionArray |
# ndarray[Any, Any] | Index | Series | Sequence[float]"; expected "float"
res_df = self.quantile(
[q], # type: ignore[list-item]
axis=axis,
numeric_only=numeric_only,
interpolation=interpolation,
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11743,7 +11743,7 @@ def _logical_func(
self,
name: str,
func,
axis: Axis = 0,
axis: Axis | None = 0,
bool_only: bool_t = False,
skipna: bool_t = True,
**kwargs,
Expand All @@ -11756,7 +11756,10 @@ def _logical_func(
res = self._logical_func(
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return res._logical_func(name, func, skipna=skipna, **kwargs)
# error: Item "bool" of "Series | bool" has no attribute "_logical_func"
return res._logical_func( # type: ignore[union-attr]
name, func, skipna=skipna, **kwargs
)
elif axis is None:
axis = 0

Expand Down
31 changes: 12 additions & 19 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import abc
from collections.abc import (
Hashable,
Iterable,
Expand Down Expand Up @@ -549,7 +548,7 @@ def read_excel(
_WorkbookT = TypeVar("_WorkbookT")


class BaseExcelReader(Generic[_WorkbookT], metaclass=abc.ABCMeta):
class BaseExcelReader(Generic[_WorkbookT]):
book: _WorkbookT

def __init__(
Expand Down Expand Up @@ -589,13 +588,11 @@ def __init__(
)

@property
@abc.abstractmethod
def _workbook_class(self) -> type[_WorkbookT]:
pass
raise NotImplementedError

@abc.abstractmethod
def load_workbook(self, filepath_or_buffer, engine_kwargs) -> _WorkbookT:
pass
raise NotImplementedError

def close(self) -> None:
if hasattr(self, "book"):
Expand All @@ -611,21 +608,17 @@ def close(self) -> None:
self.handles.close()

@property
@abc.abstractmethod
def sheet_names(self) -> list[str]:
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_by_name(self, name: str):
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_by_index(self, index: int):
pass
raise NotImplementedError

@abc.abstractmethod
def get_sheet_data(self, sheet, rows: int | None = None):
pass
raise NotImplementedError

def raise_if_bad_sheet_by_index(self, index: int) -> None:
n_sheets = len(self.sheet_names)
Expand Down Expand Up @@ -940,7 +933,7 @@ def parse(


@doc(storage_options=_shared_docs["storage_options"])
class ExcelWriter(Generic[_WorkbookT], metaclass=abc.ABCMeta):
class ExcelWriter(Generic[_WorkbookT]):
"""
Class for writing DataFrame objects into excel sheets.

Expand Down Expand Up @@ -1178,20 +1171,19 @@ def engine(self) -> str:
return self._engine

@property
@abc.abstractmethod
def sheets(self) -> dict[str, Any]:
"""Mapping of sheet names to sheet objects."""
raise NotImplementedError

@property
@abc.abstractmethod
def book(self) -> _WorkbookT:
"""
Book instance. Class type will depend on the engine used.

This attribute can be used to access engine-specific features.
"""
raise NotImplementedError

@abc.abstractmethod
def _write_cells(
self,
cells,
Expand All @@ -1214,12 +1206,13 @@ def _write_cells(
freeze_panes: int tuple of length 2
contains the bottom-most row and right-most column to freeze
"""
raise NotImplementedError

@abc.abstractmethod
def _save(self) -> None:
"""
Save workbook to disk.
"""
raise NotImplementedError

def __init__(
self,
Expand Down
4 changes: 1 addition & 3 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,9 +941,7 @@ def write(
if isinstance(writer, ExcelWriter):
need_save = False
else:
# error: Cannot instantiate abstract class 'ExcelWriter' with abstract
# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
writer = ExcelWriter( # type: ignore[abstract]
writer = ExcelWriter(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this immediately raise in the try below?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, no, because we overwrite __new__. I think this is okay.

writer,
engine=engine,
storage_options=storage_options,
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
ReadBuffer,
StorageOptions,
WriteBuffer,
Self,
)

from pandas.core.generic import NDFrame
Expand Down Expand Up @@ -1056,7 +1057,7 @@ def close(self) -> None:
if self.handles is not None:
self.handles.close()

def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]:
def __iter__(self) -> Self:
return self

@overload
Expand Down Expand Up @@ -1099,7 +1100,7 @@ def __next__(self) -> DataFrame | Series:
else:
return obj

def __enter__(self) -> JsonReader[FrameSeriesStrT]:
def __enter__(self) -> Self:
return self

def __exit__(
Expand Down
50 changes: 50 additions & 0 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,13 +1307,60 @@ def read_table(
return _read(filepath_or_buffer, kwds)


@overload
def read_fwf(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
*,
colspecs: Sequence[tuple[int, int]] | str | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | lib.NoDefault = ...,
iterator: Literal[True],
chunksize: int | None = ...,
**kwds,
) -> TextFileReader:
...


@overload
def read_fwf(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
*,
colspecs: Sequence[tuple[int, int]] | str | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | lib.NoDefault = ...,
iterator: bool = ...,
chunksize: int,
**kwds,
) -> TextFileReader:
...


@overload
def read_fwf(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
*,
colspecs: Sequence[tuple[int, int]] | str | None = ...,
widths: Sequence[int] | None = ...,
infer_nrows: int = ...,
dtype_backend: DtypeBackend | lib.NoDefault = ...,
iterator: Literal[False] = ...,
chunksize: None = ...,
**kwds,
) -> DataFrame:
...


def read_fwf(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
*,
colspecs: Sequence[tuple[int, int]] | str | None = "infer",
widths: Sequence[int] | None = None,
infer_nrows: int = 100,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
iterator: bool = False,
chunksize: int | None = None,
**kwds,
) -> DataFrame | TextFileReader:
r"""
Expand Down Expand Up @@ -1374,6 +1421,9 @@ def read_fwf(
--------
>>> pd.read_fwf('data.csv') # doctest: +SKIP
"""
kwds["iterator"] = iterator
kwds["chunksize"] = chunksize
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Would it be better to do this down on L1462 with the other assignments?


# Check input arguments.
if colspecs is None and widths is None:
raise ValueError("Must specify either colspecs or widths")
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ class TestPDApi(Base):
"plotting",
"io",
"tseries",
"util",
]
private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"]
private_lib = ["compat", "core", "pandas", "_built_with_meson"]

# misc
misc = ["IndexSlice", "NaT", "NA"]
Expand Down