From db6ab44a8919ab1eaae0b22bdc83b45d1bfd15d3 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Tue, 30 Aug 2022 18:48:20 +0100
Subject: [PATCH 1/2] ENH: Correct typing for read/to_html

---
 pandas-stubs/_typing.pyi    |  1 +
 pandas-stubs/core/frame.pyi | 60 +++++++++++++++++++++++++++++--------
 pandas-stubs/io/html.pyi    | 50 +++++++++++++++----------------
 tests/test_io.py            |  9 +++++-
 4 files changed, 81 insertions(+), 39 deletions(-)

diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi
index 73617ff4f..24a9b52f0 100644
--- a/pandas-stubs/_typing.pyi
+++ b/pandas-stubs/_typing.pyi
@@ -236,5 +236,6 @@ CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
 
 HDFCompLib = Literal["zlib", "lzo", "bzip2", "blosc"]
 ParquetEngine = Literal["auto", "pyarrow", "fastparquet"]
+ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int]
 
 __all__ = ["npt", "type_t"]
diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
index 4bf3d5b8e..7b1316762 100644
--- a/pandas-stubs/core/frame.pyi
+++ b/pandas-stubs/core/frame.pyi
@@ -49,6 +49,7 @@ from pandas._typing import (
     Axes,
     Axis,
     AxisType,
+    ColspaceArgType,
     CompressionOptions,
     Dtype,
     DtypeNp,
@@ -326,23 +327,39 @@ class DataFrame(NDFrame, OpsMixin):
     @overload
     def to_html(
         self,
-        buf: FilePathOrBuffer | None,
-        columns: Sequence[_str] | None = ...,
-        col_space: int | list[int] | dict[_str | int, int] | None = ...,
+        buf: FilePath | WriteBuffer[str],
+        columns: Sequence[HashableT] | None = ...,
+        col_space: ColspaceArgType | None = ...,
         header: _bool = ...,
         index: _bool = ...,
         na_rep: _str = ...,
-        formatters=...,
-        float_format=...,
+        formatters: list[Callable[[object], str]]
+        | tuple[Callable[[object], str], ...]
+        | Mapping[Hashable, Callable[[object], str]]
+        | None = ...,
+        float_format: Callable[[float], str] | None = ...,
         sparsify: _bool | None = ...,
         index_names: _bool = ...,
-        justify: _str | None = ...,
+        justify: Literal[
+            "left",
+            "right",
+            "center",
+            "justify",
+            "justify-all",
+            "start",
+            "end",
+            "inherit",
+            "match-parent",
+            "initial",
+            "unset",
+        ]
+        | None = ...,
         max_rows: int | None = ...,
         max_cols: int | None = ...,
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         bold_rows: _bool = ...,
-        classes: _str | list | tuple | None = ...,
+        classes: _str | Sequence[str] | None = ...,
         escape: _bool = ...,
         notebook: _bool = ...,
         border: int | None = ...,
@@ -353,22 +370,39 @@ class DataFrame(NDFrame, OpsMixin):
     @overload
     def to_html(
         self,
-        columns: Sequence[_str] | None = ...,
-        col_space: int | list[int] | dict[_str | int, int] | None = ...,
+        buf: None = ...,
+        columns: Sequence[HashableT] | None = ...,
+        col_space: ColspaceArgType | None = ...,
         header: _bool = ...,
         index: _bool = ...,
         na_rep: _str = ...,
-        formatters=...,
-        float_format=...,
+        formatters: list[Callable[[object], str]]
+        | tuple[Callable[[object], str], ...]
+        | Mapping[Hashable, Callable[[object], str]]
+        | None = ...,
+        float_format: Callable[[float], str] | None = ...,
         sparsify: _bool | None = ...,
         index_names: _bool = ...,
-        justify: _str | None = ...,
+        justify: Literal[
+            "left",
+            "right",
+            "center",
+            "justify",
+            "justify-all",
+            "start",
+            "end",
+            "inherit",
+            "match-parent",
+            "initial",
+            "unset",
+        ]
+        | None = ...,
         max_rows: int | None = ...,
         max_cols: int | None = ...,
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         bold_rows: _bool = ...,
-        classes: _str | list | tuple | None = ...,
+        classes: _str | Sequence[str] | None = ...,
         escape: _bool = ...,
         notebook: _bool = ...,
         border: int | None = ...,
diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi
index b30c30129..360963b67 100644
--- a/pandas-stubs/io/html.pyi
+++ b/pandas-stubs/io/html.pyi
@@ -1,46 +1,46 @@
 from typing import (
     Any,
     Callable,
-    Iterable,
+    Hashable,
+    Literal,
     Mapping,
+    Pattern,
     Sequence,
 )
 
 from pandas.core.frame import DataFrame
 
-from pandas._typing import FilePathOrBuffer
-
-class _HtmlFrameParser:
-    io = ...
-    match = ...
-    attrs = ...
-    encoding = ...
-    displayed_only = ...
-    def __init__(self, io, match, attrs, encoding, displayed_only) -> None: ...
-    def parse_tables(self): ...
-
-class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
-    def __init__(self, *args, **kwargs) -> None: ...
-
-class _LxmlFrameParser(_HtmlFrameParser):
-    def __init__(self, *args, **kwargs) -> None: ...
+from pandas._typing import (
+    FilePath,
+    HashableT,
+    ReadBuffer,
+)
 
 def read_html(
-    io: FilePathOrBuffer,
-    match: str = ...,
+    io: FilePath | ReadBuffer[str],
+    match: str | Pattern = ...,
     flavor: str | None = ...,
     header: int | Sequence[int] | None = ...,
-    index_col: int | Sequence[Any] | None = ...,
-    skiprows: int | Sequence[Any] | slice | None = ...,
+    index_col: int | Sequence[int] | list[HashableT] | None = ...,
+    skiprows: int | Sequence[int] | slice | None = ...,
     attrs: Mapping[str, str] | None = ...,
     parse_dates: bool
-    | Sequence[int | str | Sequence[int | str]]
-    | dict[str, Sequence[int | str]] = ...,
+    | Sequence[int]
+    | list[HashableT]  # Cannot be Sequence[Hashable] to prevent str
+    | Sequence[Sequence[int]]
+    | Sequence[Sequence[Hashable]]
+    | dict[str, Sequence[int]]
+    | dict[str, list[HashableT]] = ...,
     thousands: str = ...,
     encoding: str | None = ...,
     decimal: str = ...,
-    converters: Mapping[int | str, Callable] | None = ...,
-    na_values: Iterable[Any] | None = ...,
+    converters: Mapping[int | HashableT, Callable[[str], Any]] | None = ...,
+    na_values: str
+    | list[str]
+    | dict[HashableT, str]
+    | dict[HashableT, list[str]]
+    | None = ...,
     keep_default_na: bool = ...,
     displayed_only: bool = ...,
+    extract_links: Literal["header", "footer", "body", "all"] | None = ...,
 ) -> list[DataFrame]: ...
diff --git a/tests/test_io.py b/tests/test_io.py
index f87265710..33a767816 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,5 +1,4 @@
 import io
-import os
 import os.path
 import pathlib
 from pathlib import Path
@@ -18,6 +17,7 @@
     read_clipboard,
     read_feather,
     read_hdf,
+    read_html,
     read_json,
     read_orc,
     read_parquet,
@@ -337,3 +337,10 @@ def test_feather():
     check(assert_type(DF.to_feather(bio), None), type(None))
     bio.seek(0)
     check(assert_type(read_feather(bio), DataFrame), DataFrame)
+
+
+def test_read_html():
+    check(assert_type(DF.to_html(), str), str)
+    with ensure_clean() as path:
+        check(assert_type(DF.to_html(path), None), type(None))
+        check(assert_type(read_html(path), List[DataFrame]), list)

From cf4fe7fe83bdaa640f7e5233155bbc1301b52441 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Wed, 31 Aug 2022 10:23:25 +0100
Subject: [PATCH 2/2] TYP: Remove redundant and improve types

---
 pandas-stubs/core/frame.pyi | 6 +++---
 pandas-stubs/io/html.pyi    | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi
index 7b1316762..c7378eca5 100644
--- a/pandas-stubs/core/frame.pyi
+++ b/pandas-stubs/core/frame.pyi
@@ -328,7 +328,7 @@ class DataFrame(NDFrame, OpsMixin):
     def to_html(
         self,
         buf: FilePath | WriteBuffer[str],
-        columns: Sequence[HashableT] | None = ...,
+        columns: list[HashableT] | None = ...,
         col_space: ColspaceArgType | None = ...,
         header: _bool = ...,
         index: _bool = ...,
@@ -359,7 +359,7 @@ class DataFrame(NDFrame, OpsMixin):
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         bold_rows: _bool = ...,
-        classes: _str | Sequence[str] | None = ...,
+        classes: Sequence[str] | None = ...,
         escape: _bool = ...,
         notebook: _bool = ...,
         border: int | None = ...,
@@ -402,7 +402,7 @@ class DataFrame(NDFrame, OpsMixin):
         show_dimensions: _bool = ...,
         decimal: _str = ...,
         bold_rows: _bool = ...,
-        classes: _str | Sequence[str] | None = ...,
+        classes: Sequence[str] | None = ...,
         escape: _bool = ...,
         notebook: _bool = ...,
         border: int | None = ...,
diff --git a/pandas-stubs/io/html.pyi b/pandas-stubs/io/html.pyi
index 360963b67..227cba159 100644
--- a/pandas-stubs/io/html.pyi
+++ b/pandas-stubs/io/html.pyi
@@ -23,11 +23,10 @@ def read_html(
     header: int | Sequence[int] | None = ...,
     index_col: int | Sequence[int] | list[HashableT] | None = ...,
     skiprows: int | Sequence[int] | slice | None = ...,
-    attrs: Mapping[str, str] | None = ...,
+    attrs: dict[str, str] | None = ...,
     parse_dates: bool
     | Sequence[int]
     | list[HashableT]  # Cannot be Sequence[Hashable] to prevent str
-    | Sequence[Sequence[int]]
     | Sequence[Sequence[Hashable]]
     | dict[str, Sequence[int]]
     | dict[str, list[HashableT]] = ...,