Skip to content

Commit 15d2775

Browse files
bashtageKevin SheppardDr-Irv
authored
Io excel (#239)
* ENH: IMprove typing for read_excel * ENH: IMprove typing for Excel tools * TST: Add tests for to/read_excel Add tests Correct typing * BUG: Close file * CLN: Simplify type * MAINT: Refactor to move excel tests * MAINT: Set experimental * STY: Apply black * Use experimental * MAINT: Rollback CI changes Co-authored-by: Kevin Sheppard <kevin.sheppard@gmail.com> Co-authored-by: Irv Lustig <irv@princeton.com>
1 parent 4890dba commit 15d2775

File tree

6 files changed

+317
-166
lines changed

6 files changed

+317
-166
lines changed

pandas-stubs/_typing.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ PythonScalar = Union[str, bool, complex]
4747
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", Period, Timestamp, Timedelta)
4848
PandasScalar = Union[bytes, datetime.date, datetime.datetime, datetime.timedelta]
4949
# Scalar = Union[PythonScalar, PandasScalar]
50-
IntStrT = TypeVar("IntStrT", int, str)
5150

5251
DatetimeLike = Union[datetime.date, datetime.datetime, np.datetime64, Timestamp]
5352

@@ -67,6 +66,9 @@ class BaseBuffer(Protocol): ...
6766
class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): ...
6867
class WriteBuffer(BaseBuffer, Protocol[AnyStr_cov]): ...
6968

69+
class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
70+
def truncate(self, size: Union[int, None] = ...) -> int: ...
71+
7072
FilePath = Union[str, PathLike[str]]
7173

7274
Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]

pandas-stubs/io/common.pyi

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import (
2+
IO,
3+
AnyStr,
4+
Generic,
5+
)
6+
7+
from pandas._typing import CompressionDict
8+
9+
class IOHandles(Generic[AnyStr]):
10+
handle: IO[AnyStr]
11+
compression: CompressionDict
12+
created_handles: list[IO[AnyStr]]
13+
is_wrapped: bool
14+
def close(self) -> None: ...
15+
def __enter__(self) -> IOHandles[AnyStr]: ...
16+
def __exit__(self, *args: object) -> None: ...
17+
def __init__(self, handle, compression, created_handles, is_wrapped) -> None: ...

pandas-stubs/io/excel/_base.pyi

Lines changed: 175 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,188 +1,239 @@
1-
import abc
1+
from types import TracebackType
22
from typing import (
33
Any,
44
Callable,
5+
Generator,
6+
Hashable,
7+
Iterable,
8+
Literal,
59
Sequence,
610
overload,
711
)
812

13+
from odf.opendocument import OpenDocument
14+
from openpyxl.workbook.workbook import Workbook
915
from pandas.core.frame import DataFrame
16+
import pyxlsb.workbook
17+
from xlrd.book import Book
1018

1119
from pandas._typing import (
1220
Dtype,
1321
FilePath,
1422
ReadBuffer,
15-
Scalar,
23+
StorageOptions,
24+
WriteExcelBuffer,
1625
)
1726

27+
from pandas.io.common import IOHandles
28+
1829
@overload
1930
def read_excel(
20-
filepath: FilePath | ReadBuffer[bytes] | bytes,
31+
io: FilePath
32+
| ReadBuffer[bytes]
33+
| bytes
34+
| ExcelFile
35+
| Workbook
36+
| Book
37+
| OpenDocument
38+
| pyxlsb.workbook.Workbook,
2139
sheet_name: list[int | str] | None,
40+
*,
2241
header: int | Sequence[int] | None = ...,
2342
names: list[str] | None = ...,
2443
index_col: int | Sequence[int] | None = ...,
25-
usecols: int | str | Sequence[int | str | Callable] | None = ...,
26-
squeeze: bool = ...,
27-
dtype: str | dict[str, Any] | Dtype = ...,
28-
engine: str | None = ...,
29-
converters: dict[int | str, Callable] | None = ...,
30-
true_values: Sequence[Scalar] | None = ...,
31-
false_values: Sequence[Scalar] | None = ...,
32-
skiprows: Sequence[int] | int | Callable | None = ...,
44+
usecols: Sequence[int] | Sequence[str] | Callable[[str], bool] | None = ...,
45+
dtype: str | Dtype | dict[str, str | Dtype] | None = ...,
46+
engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
47+
converters: dict[int | str, Callable[[object], object]] | None = ...,
48+
true_values: Iterable[Hashable] | None = ...,
49+
false_values: Iterable[Hashable] | None = ...,
50+
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
3351
nrows: int | None = ...,
34-
na_values=...,
52+
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
3553
keep_default_na: bool = ...,
54+
na_filter: bool = ...,
3655
verbose: bool = ...,
37-
parse_dates: bool | Sequence | dict[str, Sequence] = ...,
56+
parse_dates: bool
57+
| Sequence[int]
58+
| Sequence[Sequence[str] | Sequence[int]]
59+
| dict[str, Sequence[int] | list[str]] = ...,
3860
date_parser: Callable | None = ...,
3961
thousands: str | None = ...,
62+
decimal: str = ...,
4063
comment: str | None = ...,
4164
skipfooter: int = ...,
42-
convert_float: bool = ...,
43-
mangle_dupe_cols: bool = ...,
65+
storage_options: StorageOptions = ...,
4466
) -> dict[int | str, DataFrame]: ...
4567
@overload
4668
def read_excel(
47-
filepath: FilePath | ReadBuffer[bytes] | bytes,
69+
io: FilePath
70+
| ReadBuffer[bytes]
71+
| bytes
72+
| ExcelFile
73+
| Workbook
74+
| Book
75+
| OpenDocument
76+
| pyxlsb.workbook.Workbook,
4877
sheet_name: int | str = ...,
78+
*,
4979
header: int | Sequence[int] | None = ...,
5080
names: list[str] | None = ...,
5181
index_col: int | Sequence[int] | None = ...,
52-
usecols: int | str | Sequence[int | str | Callable] | None = ...,
53-
squeeze: bool = ...,
54-
dtype: str | dict[str, Any] | Dtype = ...,
55-
engine: str | None = ...,
56-
converters: dict[int | str, Callable] | None = ...,
57-
true_values: Sequence[Scalar] | None = ...,
58-
false_values: Sequence[Scalar] | None = ...,
59-
skiprows: Sequence[int] | int | Callable | None = ...,
82+
usecols: Sequence[int] | Sequence[str] | Callable[[str], bool] | None = ...,
83+
dtype: str | Dtype | dict[str, str | Dtype] | None = ...,
84+
engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
85+
converters: dict[int | str, Callable[[object], object]] | None = ...,
86+
true_values: Iterable[Hashable] | None = ...,
87+
false_values: Iterable[Hashable] | None = ...,
88+
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
6089
nrows: int | None = ...,
61-
na_values=...,
90+
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
6291
keep_default_na: bool = ...,
92+
na_filter: bool = ...,
6393
verbose: bool = ...,
64-
parse_dates: bool | Sequence | dict[str, Sequence] = ...,
94+
parse_dates: bool
95+
| Sequence[int]
96+
| Sequence[Sequence[str] | Sequence[int]]
97+
| dict[str, Sequence[int] | list[str]] = ...,
6598
date_parser: Callable | None = ...,
6699
thousands: str | None = ...,
100+
decimal: str = ...,
67101
comment: str | None = ...,
68102
skipfooter: int = ...,
69-
convert_float: bool = ...,
70-
mangle_dupe_cols: bool = ...,
71-
**kwargs,
103+
storage_options: StorageOptions = ...,
72104
) -> DataFrame: ...
73105

74-
class BaseExcelReader(metaclass=abc.ABCMeta):
75-
book = ...
76-
def __init__(self, filepath_or_buffer) -> None: ...
77-
@abc.abstractmethod
78-
def load_workbook(self, filepath_or_buffer): ...
79-
def close(self) -> None: ...
80-
@property
81-
@abc.abstractmethod
82-
def sheet_names(self): ...
83-
@abc.abstractmethod
84-
def get_sheet_by_name(self, name): ...
85-
@abc.abstractmethod
86-
def get_sheet_by_index(self, index): ...
87-
@abc.abstractmethod
88-
def get_sheet_data(self, sheet, convert_float): ...
89-
def parse(
106+
class ExcelWriter:
107+
def __init__(
90108
self,
91-
sheet_name: int = ...,
92-
header: int = ...,
93-
names=...,
94-
index_col=...,
95-
usecols=...,
96-
squeeze: bool = ...,
97-
dtype=...,
98-
true_values=...,
99-
false_values=...,
100-
skiprows=...,
101-
nrows=...,
102-
na_values=...,
103-
verbose: bool = ...,
104-
parse_dates: bool = ...,
105-
date_parser=...,
106-
thousands=...,
107-
comment=...,
108-
skipfooter: int = ...,
109-
convert_float: bool = ...,
110-
mangle_dupe_cols: bool = ...,
111-
**kwds,
112-
): ...
113-
114-
class ExcelWriter(metaclass=abc.ABCMeta):
115-
def __new__(cls, path, engine=..., **kwargs): ...
116-
book = ...
117-
curr_sheet = ...
118-
path = ...
109+
path: FilePath | WriteExcelBuffer | ExcelWriter,
110+
engine: Literal["auto", "openpyxl", "pyxlsb", "odf"] | None = ...,
111+
date_format: str | None = ...,
112+
datetime_format: str | None = ...,
113+
mode: Literal["w", "writer"] = ...,
114+
storage_options: StorageOptions = ...,
115+
if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = ...,
116+
engine_kwargs: dict[str, Any] | None = ...,
117+
) -> None: ...
118+
@property
119+
def supported_extensions(self) -> tuple[str, ...]: ...
119120
@property
120-
def supported_extensions(self): ...
121+
def engine(self) -> Literal["openpyxl", "pyxlsb", "odf"]: ...
121122
@property
122-
def engine(self): ...
123+
def sheets(self) -> dict[str, Any]: ...
124+
@property
125+
def book(self) -> Workbook | OpenDocument | pyxlsb.workbook.Workbook: ...
123126
def write_cells(
124127
self,
125-
cells,
126-
sheet_name=...,
128+
cells: Generator[object, None, None],
129+
sheet_name: str | None = ...,
127130
startrow: int = ...,
128131
startcol: int = ...,
129-
freeze_panes=...,
130-
): ...
131-
def save(self): ...
132-
sheets = ...
133-
cur_sheet = ...
134-
date_format: str = ...
135-
datetime_format: str = ...
136-
mode = ...
137-
def __init__(
132+
freeze_panes: tuple[int, int] | None = ...,
133+
) -> None: ...
134+
def save(self) -> None: ...
135+
@property
136+
def date_format(self) -> str: ...
137+
@property
138+
def datetime_format(self) -> str: ...
139+
@property
140+
def if_sheet_exists(self) -> Literal["error", "new", "replace", "overlay"]: ...
141+
@property
142+
def cur_sheet(self) -> Any: ...
143+
@property
144+
def handles(self) -> IOHandles[bytes]: ...
145+
@property
146+
def path(self) -> str | None: ...
147+
def __fspath__(self) -> str: ...
148+
def __enter__(self) -> ExcelWriter: ...
149+
def __exit__(
138150
self,
139-
path,
140-
engine=...,
141-
date_format=...,
142-
datetime_format=...,
143-
mode: str = ...,
144-
**engine_kwargs,
151+
exc_type: type[BaseException] | None,
152+
exc_value: BaseException | None,
153+
traceback: TracebackType | None,
145154
) -> None: ...
146-
def __fspath__(self): ...
147-
@classmethod
148-
def check_extension(cls, ext): ...
149-
def __enter__(self): ...
150-
def __exit__(self, exc_type, exc_value, traceback) -> None: ...
151-
def close(self): ...
155+
def close(self) -> None: ...
152156

153157
class ExcelFile:
154158
engine = ...
155-
io = ...
156-
def __init__(self, io, engine=...) -> None: ...
159+
io: FilePath | ReadBuffer[bytes] | bytes = ...
160+
def __init__(
161+
self,
162+
io: FilePath | ReadBuffer[bytes] | bytes,
163+
engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
164+
storage_options: StorageOptions = ...,
165+
) -> None: ...
157166
def __fspath__(self): ...
167+
@overload
158168
def parse(
159169
self,
160-
sheet_name: int = ...,
161-
header: int = ...,
162-
names=...,
163-
index_col=...,
164-
usecols=...,
165-
squeeze: bool = ...,
166-
converters=...,
167-
true_values=...,
168-
false_values=...,
169-
skiprows=...,
170-
nrows=...,
171-
na_values=...,
172-
parse_dates: bool = ...,
173-
date_parser=...,
174-
thousands=...,
175-
comment=...,
170+
sheet_name: list[int | str] | None,
171+
header: int | Sequence[int] | None = ...,
172+
names: list[str] | None = ...,
173+
index_col: int | Sequence[int] | None = ...,
174+
usecols: str
175+
| Sequence[int]
176+
| Sequence[str]
177+
| Callable[[str], bool]
178+
| None = ...,
179+
converters: dict[int | str, Callable[[object], object]] | None = ...,
180+
true_values: Iterable[Hashable] | None = ...,
181+
false_values: Iterable[Hashable] | None = ...,
182+
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
183+
nrows: int | None = ...,
184+
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
185+
parse_dates: bool
186+
| Sequence[int]
187+
| Sequence[Sequence[str] | Sequence[int]]
188+
| dict[str, Sequence[int] | list[str]] = ...,
189+
date_parser: Callable | None = ...,
190+
thousands: str | None = ...,
191+
comment: str | None = ...,
176192
skipfooter: int = ...,
177-
convert_float: bool = ...,
178-
mangle_dupe_cols: bool = ...,
179-
**kwds,
180-
): ...
193+
keep_default_na: bool = ...,
194+
na_filter: bool = ...,
195+
**kwds: Any,
196+
) -> dict[int | str, DataFrame]: ...
197+
@overload
198+
def parse(
199+
self,
200+
sheet_name: int | str,
201+
header: int | Sequence[int] | None = ...,
202+
names: list[str] | None = ...,
203+
index_col: int | Sequence[int] | None = ...,
204+
usecols: str
205+
| Sequence[int]
206+
| Sequence[str]
207+
| Callable[[str], bool]
208+
| None = ...,
209+
converters: dict[int | str, Callable[[object], object]] | None = ...,
210+
true_values: Iterable[Hashable] | None = ...,
211+
false_values: Iterable[Hashable] | None = ...,
212+
skiprows: int | Sequence[int] | Callable[[object], bool] | None = ...,
213+
nrows: int | None = ...,
214+
na_values: Sequence[str] | dict[str | int, Sequence[str]] = ...,
215+
parse_dates: bool
216+
| Sequence[int]
217+
| Sequence[Sequence[str] | Sequence[int]]
218+
| dict[str, Sequence[int] | list[str]] = ...,
219+
date_parser: Callable | None = ...,
220+
thousands: str | None = ...,
221+
comment: str | None = ...,
222+
skipfooter: int = ...,
223+
keep_default_na: bool = ...,
224+
na_filter: bool = ...,
225+
**kwds: Any,
226+
) -> DataFrame: ...
181227
@property
182-
def book(self): ...
228+
def book(self) -> Workbook | Book | OpenDocument | pyxlsb.workbook.Workbook: ...
183229
@property
184-
def sheet_names(self): ...
230+
def sheet_names(self) -> list[int | str]: ...
185231
def close(self) -> None: ...
186-
def __enter__(self): ...
187-
def __exit__(self, exc_type, exc_value, traceback) -> None: ...
232+
def __enter__(self) -> ExcelFile: ...
233+
def __exit__(
234+
self,
235+
exc_type: type[BaseException] | None,
236+
exc_value: BaseException | None,
237+
traceback: TracebackType | None,
238+
) -> None: ...
188239
def __del__(self) -> None: ...

0 commit comments

Comments
 (0)