From db6ab44a8919ab1eaae0b22bdc83b45d1bfd15d3 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 30 Aug 2022 18:48:20 +0100 Subject: [PATCH 1/2] ENH: Correct typing for read/to_html --- pandas-stubs/_typing.pyi | 1 + pandas-stubs/core/frame.pyi | 60 +++++++++++++++++++++++++++++-------- pandas-stubs/io/html.pyi | 50 +++++++++++++++---------------- tests/test_io.py | 9 +++++- 4 files changed, 81 insertions(+), 39 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 73617ff4f..24a9b52f0 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -236,5 +236,6 @@ CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] HDFCompLib = Literal["zlib", "lzo", "bzip2", "blosc"] ParquetEngine = Literal["auto", "pyarrow", "fastparquet"] +ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int] __all__ = ["npt", "type_t"] diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 4bf3d5b8e..7b1316762 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -49,6 +49,7 @@ from pandas._typing import ( Axes, Axis, AxisType, + ColspaceArgType, CompressionOptions, Dtype, DtypeNp, @@ -326,23 +327,39 @@ class DataFrame(NDFrame, OpsMixin): @overload def to_html( self, - buf: FilePathOrBuffer | None, - columns: Sequence[_str] | None = ..., - col_space: int | list[int] | dict[_str | int, int] | None = ..., + buf: FilePath | WriteBuffer[str], + columns: Sequence[HashableT] | None = ..., + col_space: ColspaceArgType | None = ..., header: _bool = ..., index: _bool = ..., na_rep: _str = ..., - formatters=..., - float_format=..., + formatters: list[Callable[[object], str]] + | tuple[Callable[[object], str], ...] + | Mapping[Hashable, Callable[[object], str]] + | None = ..., + float_format: Callable[[float], str] | None = ..., sparsify: _bool | None = ..., index_names: _bool = ..., - justify: _str | None = ..., + justify: Literal[ + "left", + "right", + "center", + "justify", + "justify-all", + "start", + "end", + "inherit", + "match-parent", + "initial", + "unset", + ] + | None = ..., max_rows: int | None = ..., max_cols: int | None = ..., show_dimensions: _bool = ..., decimal: _str = ..., bold_rows: _bool = ..., - classes: _str | list | tuple | None = ..., + classes: _str | Sequence[str] | None = ..., escape: _bool = ..., notebook: _bool = ..., border: int | None = ..., @@ -353,22 +370,39 @@ class DataFrame(NDFrame, OpsMixin): @overload def to_html( self, - columns: Sequence[_str] | None = ..., - col_space: int | list[int] | dict[_str | int, int] | None = ..., + buf: None = ..., + columns: Sequence[HashableT] | None = ..., + col_space: ColspaceArgType | None = ..., header: _bool = ..., index: _bool = ..., na_rep: _str = ..., - formatters=..., - float_format=..., + formatters: list[Callable[[object], str]] + | tuple[Callable[[object], str], ...] + | Mapping[Hashable, Callable[[object], str]] + | None = ..., + float_format: Callable[[float], str] | None = ..., sparsify: _bool | None = ..., index_names: _bool = ..., - justify: _str | None = ..., + justify: Literal[ + "left", + "right", + "center", + "justify", + "justify-all", + "start", + "end", + "inherit", + "match-parent", + "initial", + "unset", + ] + | None = ..., max_rows: int | None = ..., max_cols: int | None = ..., show_dimensions: _bool = ..., decimal: _str = ..., bold_rows: _bool = ..., - classes: _str | list | tuple | None = ..., + classes: _str | Sequence[str] | None = ..., escape: _bool = ..., notebook: _bool = ..., border: int | None = ..., diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi index b30c30129..360963b67 100644 --- a/pandas-stubs/io/html.pyi +++ b/pandas-stubs/io/html.pyi @@ -1,46 +1,46 @@ from typing import ( Any, Callable, - Iterable, + Hashable, + Literal, Mapping, + Pattern, Sequence, ) from pandas.core.frame import DataFrame -from pandas._typing import FilePathOrBuffer - -class _HtmlFrameParser: - io = ... - match = ... - attrs = ... - encoding = ... - displayed_only = ... - def __init__(self, io, match, attrs, encoding, displayed_only) -> None: ... - def parse_tables(self): ... - -class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): - def __init__(self, *args, **kwargs) -> None: ... - -class _LxmlFrameParser(_HtmlFrameParser): - def __init__(self, *args, **kwargs) -> None: ... +from pandas._typing import ( + FilePath, + HashableT, + ReadBuffer, +) def read_html( - io: FilePathOrBuffer, - match: str = ..., + io: FilePath | ReadBuffer[str], + match: str | Pattern = ..., flavor: str | None = ..., header: int | Sequence[int] | None = ..., - index_col: int | Sequence[Any] | None = ..., - skiprows: int | Sequence[Any] | slice | None = ..., + index_col: int | Sequence[int] | list[HashableT] | None = ..., + skiprows: int | Sequence[int] | slice | None = ..., attrs: Mapping[str, str] | None = ..., parse_dates: bool - | Sequence[int | str | Sequence[int | str]] - | dict[str, Sequence[int | str]] = ..., + | Sequence[int] + | list[HashableT] # Cannot be Sequence[Hashable] to prevent str + | Sequence[Sequence[int]] + | Sequence[Sequence[Hashable]] + | dict[str, Sequence[int]] + | dict[str, list[HashableT]] = ..., thousands: str = ..., encoding: str | None = ..., decimal: str = ..., - converters: Mapping[int | str, Callable] | None = ..., - na_values: Iterable[Any] | None = ..., + converters: Mapping[int | HashableT, Callable[[str], Any]] | None = ..., + na_values: str + | list[str] + | dict[HashableT, str] + | dict[HashableT, list[str]] + | None = ..., keep_default_na: bool = ..., displayed_only: bool = ..., + extract_links: Literal["header", "footer", "body", "all"] | None = ..., ) -> list[DataFrame]: ... diff --git a/tests/test_io.py b/tests/test_io.py index f87265710..33a767816 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,5 +1,4 @@ import io -import os import os.path import pathlib from pathlib import Path @@ -18,6 +17,7 @@ read_clipboard, read_feather, read_hdf, + read_html, read_json, read_orc, read_parquet, @@ -337,3 +337,10 @@ def test_feather(): check(assert_type(DF.to_feather(bio), None), type(None)) bio.seek(0) check(assert_type(read_feather(bio), DataFrame), DataFrame) + + +def test_read_html(): + check(assert_type(DF.to_html(), str), str) + with ensure_clean() as path: + check(assert_type(DF.to_html(path), None), type(None)) + check(assert_type(read_html(path), List[DataFrame]), list) From cf4fe7fe83bdaa640f7e5233155bbc1301b52441 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 31 Aug 2022 10:23:25 +0100 Subject: [PATCH 2/2] TYP: Remove redundant and improve types --- pandas-stubs/core/frame.pyi | 6 +++--- pandas-stubs/io/html.pyi | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 7b1316762..c7378eca5 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -328,7 +328,7 @@ class DataFrame(NDFrame, OpsMixin): def to_html( self, buf: FilePath | WriteBuffer[str], - columns: Sequence[HashableT] | None = ..., + columns: list[HashableT] | None = ..., col_space: ColspaceArgType | None = ..., header: _bool = ..., index: _bool = ..., @@ -359,7 +359,7 @@ class DataFrame(NDFrame, OpsMixin): show_dimensions: _bool = ..., decimal: _str = ..., bold_rows: _bool = ..., - classes: _str | Sequence[str] | None = ..., + classes: Sequence[str] | None = ..., escape: _bool = ..., notebook: _bool = ..., border: int | None = ..., @@ -402,7 +402,7 @@ class DataFrame(NDFrame, OpsMixin): show_dimensions: _bool = ..., decimal: _str = ..., bold_rows: _bool = ..., - classes: _str | Sequence[str] | None = ..., + classes: Sequence[str] | None = ..., escape: _bool = ..., notebook: _bool = ..., border: int | None = ..., diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi index 360963b67..227cba159 100644 --- a/pandas-stubs/io/html.pyi +++ b/pandas-stubs/io/html.pyi @@ -23,11 +23,10 @@ def read_html( header: int | Sequence[int] | None = ..., index_col: int | Sequence[int] | list[HashableT] | None = ..., skiprows: int | Sequence[int] | slice | None = ..., - attrs: Mapping[str, str] | None = ..., + attrs: dict[str, str] | None = ..., parse_dates: bool | Sequence[int] | list[HashableT] # Cannot be Sequence[Hashable] to prevent str - | Sequence[Sequence[int]] | Sequence[Sequence[Hashable]] | dict[str, Sequence[int]] | dict[str, list[HashableT]] = ...,