Skip to content

Commit 0822888

Browse files
bashtageKevin Sheppard
and
Kevin Sheppard
authored
ENH: Correct typing for read/to_html (#236)
* ENH: Correct typing for read/to_html * TYP: Remove redundant and improve types Co-authored-by: Kevin Sheppard <kevin.sheppard@gmail.com>
1 parent 1364a3c commit 0822888

File tree

4 files changed

+81
-40
lines changed

4 files changed

+81
-40
lines changed

pandas-stubs/_typing.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,5 +236,6 @@ CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
236236

237237
HDFCompLib = Literal["zlib", "lzo", "bzip2", "blosc"]
238238
ParquetEngine = Literal["auto", "pyarrow", "fastparquet"]
239+
ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int]
239240

240241
__all__ = ["npt", "type_t"]

pandas-stubs/core/frame.pyi

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ from pandas._typing import (
4949
Axes,
5050
Axis,
5151
AxisType,
52+
ColspaceArgType,
5253
CompressionOptions,
5354
Dtype,
5455
DtypeNp,
@@ -326,23 +327,39 @@ class DataFrame(NDFrame, OpsMixin):
326327
@overload
327328
def to_html(
328329
self,
329-
buf: FilePathOrBuffer | None,
330-
columns: Sequence[_str] | None = ...,
331-
col_space: int | list[int] | dict[_str | int, int] | None = ...,
330+
buf: FilePath | WriteBuffer[str],
331+
columns: list[HashableT] | None = ...,
332+
col_space: ColspaceArgType | None = ...,
332333
header: _bool = ...,
333334
index: _bool = ...,
334335
na_rep: _str = ...,
335-
formatters=...,
336-
float_format=...,
336+
formatters: list[Callable[[object], str]]
337+
| tuple[Callable[[object], str], ...]
338+
| Mapping[Hashable, Callable[[object], str]]
339+
| None = ...,
340+
float_format: Callable[[float], str] | None = ...,
337341
sparsify: _bool | None = ...,
338342
index_names: _bool = ...,
339-
justify: _str | None = ...,
343+
justify: Literal[
344+
"left",
345+
"right",
346+
"center",
347+
"justify",
348+
"justify-all",
349+
"start",
350+
"end",
351+
"inherit",
352+
"match-parent",
353+
"initial",
354+
"unset",
355+
]
356+
| None = ...,
340357
max_rows: int | None = ...,
341358
max_cols: int | None = ...,
342359
show_dimensions: _bool = ...,
343360
decimal: _str = ...,
344361
bold_rows: _bool = ...,
345-
classes: _str | list | tuple | None = ...,
362+
classes: Sequence[str] | None = ...,
346363
escape: _bool = ...,
347364
notebook: _bool = ...,
348365
border: int | None = ...,
@@ -353,22 +370,39 @@ class DataFrame(NDFrame, OpsMixin):
353370
@overload
354371
def to_html(
355372
self,
356-
columns: Sequence[_str] | None = ...,
357-
col_space: int | list[int] | dict[_str | int, int] | None = ...,
373+
buf: None = ...,
374+
columns: Sequence[HashableT] | None = ...,
375+
col_space: ColspaceArgType | None = ...,
358376
header: _bool = ...,
359377
index: _bool = ...,
360378
na_rep: _str = ...,
361-
formatters=...,
362-
float_format=...,
379+
formatters: list[Callable[[object], str]]
380+
| tuple[Callable[[object], str], ...]
381+
| Mapping[Hashable, Callable[[object], str]]
382+
| None = ...,
383+
float_format: Callable[[float], str] | None = ...,
363384
sparsify: _bool | None = ...,
364385
index_names: _bool = ...,
365-
justify: _str | None = ...,
386+
justify: Literal[
387+
"left",
388+
"right",
389+
"center",
390+
"justify",
391+
"justify-all",
392+
"start",
393+
"end",
394+
"inherit",
395+
"match-parent",
396+
"initial",
397+
"unset",
398+
]
399+
| None = ...,
366400
max_rows: int | None = ...,
367401
max_cols: int | None = ...,
368402
show_dimensions: _bool = ...,
369403
decimal: _str = ...,
370404
bold_rows: _bool = ...,
371-
classes: _str | list | tuple | None = ...,
405+
classes: Sequence[str] | None = ...,
372406
escape: _bool = ...,
373407
notebook: _bool = ...,
374408
border: int | None = ...,

pandas-stubs/io/html.pyi

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,45 @@
11
from typing import (
22
Any,
33
Callable,
4-
Iterable,
4+
Hashable,
5+
Literal,
56
Mapping,
7+
Pattern,
68
Sequence,
79
)
810

911
from pandas.core.frame import DataFrame
1012

11-
from pandas._typing import FilePathOrBuffer
12-
13-
class _HtmlFrameParser:
14-
io = ...
15-
match = ...
16-
attrs = ...
17-
encoding = ...
18-
displayed_only = ...
19-
def __init__(self, io, match, attrs, encoding, displayed_only) -> None: ...
20-
def parse_tables(self): ...
21-
22-
class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
23-
def __init__(self, *args, **kwargs) -> None: ...
24-
25-
class _LxmlFrameParser(_HtmlFrameParser):
26-
def __init__(self, *args, **kwargs) -> None: ...
13+
from pandas._typing import (
14+
FilePath,
15+
HashableT,
16+
ReadBuffer,
17+
)
2718

2819
def read_html(
29-
io: FilePathOrBuffer,
30-
match: str = ...,
20+
io: FilePath | ReadBuffer[str],
21+
match: str | Pattern = ...,
3122
flavor: str | None = ...,
3223
header: int | Sequence[int] | None = ...,
33-
index_col: int | Sequence[Any] | None = ...,
34-
skiprows: int | Sequence[Any] | slice | None = ...,
35-
attrs: Mapping[str, str] | None = ...,
24+
index_col: int | Sequence[int] | list[HashableT] | None = ...,
25+
skiprows: int | Sequence[int] | slice | None = ...,
26+
attrs: dict[str, str] | None = ...,
3627
parse_dates: bool
37-
| Sequence[int | str | Sequence[int | str]]
38-
| dict[str, Sequence[int | str]] = ...,
28+
| Sequence[int]
29+
| list[HashableT] # Cannot be Sequence[Hashable] to prevent str
30+
| Sequence[Sequence[Hashable]]
31+
| dict[str, Sequence[int]]
32+
| dict[str, list[HashableT]] = ...,
3933
thousands: str = ...,
4034
encoding: str | None = ...,
4135
decimal: str = ...,
42-
converters: Mapping[int | str, Callable] | None = ...,
43-
na_values: Iterable[Any] | None = ...,
36+
converters: Mapping[int | HashableT, Callable[[str], Any]] | None = ...,
37+
na_values: str
38+
| list[str]
39+
| dict[HashableT, str]
40+
| dict[HashableT, list[str]]
41+
| None = ...,
4442
keep_default_na: bool = ...,
4543
displayed_only: bool = ...,
44+
extract_links: Literal["header", "footer", "body", "all"] | None = ...,
4645
) -> list[DataFrame]: ...

tests/test_io.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import io
2-
import os
32
import os.path
43
import pathlib
54
from pathlib import Path
@@ -18,6 +17,7 @@
1817
read_clipboard,
1918
read_feather,
2019
read_hdf,
20+
read_html,
2121
read_json,
2222
read_orc,
2323
read_parquet,
@@ -337,3 +337,10 @@ def test_feather():
337337
check(assert_type(DF.to_feather(bio), None), type(None))
338338
bio.seek(0)
339339
check(assert_type(read_feather(bio), DataFrame), DataFrame)
340+
341+
342+
def test_read_html():
343+
check(assert_type(DF.to_html(), str), str)
344+
with ensure_clean() as path:
345+
check(assert_type(DF.to_html(path), None), type(None))
346+
check(assert_type(read_html(path), List[DataFrame]), list)

0 commit comments

Comments
 (0)