Skip to content

Commit 1fa3747

Browse files
authored
CLN/DOC: DataFrame.to_parquet supports file-like objects (#35235)
1 parent 4da8622 commit 1fa3747

File tree

2 files changed

+30
-20
lines changed

2 files changed

+30
-20
lines changed

pandas/core/frame.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
IO,
2020
TYPE_CHECKING,
2121
Any,
22+
AnyStr,
2223
Dict,
2324
FrozenSet,
2425
Hashable,
@@ -2266,11 +2267,11 @@ def to_markdown(
22662267
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
22672268
def to_parquet(
22682269
self,
2269-
path,
2270-
engine="auto",
2271-
compression="snappy",
2272-
index=None,
2273-
partition_cols=None,
2270+
path: FilePathOrBuffer[AnyStr],
2271+
engine: str = "auto",
2272+
compression: Optional[str] = "snappy",
2273+
index: Optional[bool] = None,
2274+
partition_cols: Optional[List[str]] = None,
22742275
**kwargs,
22752276
) -> None:
22762277
"""
@@ -2283,9 +2284,12 @@ def to_parquet(
22832284
22842285
Parameters
22852286
----------
2286-
path : str
2287-
File path or Root Directory path. Will be used as Root Directory
2288-
path while writing a partitioned dataset.
2287+
path : str or file-like object
2288+
If a string, it will be used as Root Directory path
2289+
when writing a partitioned dataset. By file-like object,
2290+
we refer to objects with a write() method, such as a file handler
2291+
(e.g. via builtin open function) or io.BytesIO. The engine
2292+
fastparquet does not accept file-like objects.
22892293
22902294
.. versionchanged:: 1.0.0
22912295
@@ -2312,6 +2316,7 @@ def to_parquet(
23122316
partition_cols : list, optional, default None
23132317
Column names by which to partition the dataset.
23142318
Columns are partitioned in the order they are given.
2319+
Must be None if path is not a string.
23152320
23162321
.. versionadded:: 0.24.0
23172322

pandas/io/parquet.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
""" parquet compat """
22

3-
from typing import Any, Dict, Optional
3+
from typing import Any, AnyStr, Dict, List, Optional
44
from warnings import catch_warnings
55

6+
from pandas._typing import FilePathOrBuffer
67
from pandas.compat._optional import import_optional_dependency
78
from pandas.errors import AbstractMethodError
89

@@ -85,10 +86,10 @@ def __init__(self):
8586
def write(
8687
self,
8788
df: DataFrame,
88-
path,
89-
compression="snappy",
89+
path: FilePathOrBuffer[AnyStr],
90+
compression: Optional[str] = "snappy",
9091
index: Optional[bool] = None,
91-
partition_cols=None,
92+
partition_cols: Optional[List[str]] = None,
9293
**kwargs,
9394
):
9495
self.validate_dataframe(df)
@@ -213,11 +214,11 @@ def read(self, path, columns=None, **kwargs):
213214

214215
def to_parquet(
215216
df: DataFrame,
216-
path,
217+
path: FilePathOrBuffer[AnyStr],
217218
engine: str = "auto",
218-
compression="snappy",
219+
compression: Optional[str] = "snappy",
219220
index: Optional[bool] = None,
220-
partition_cols=None,
221+
partition_cols: Optional[List[str]] = None,
221222
**kwargs,
222223
):
223224
"""
@@ -226,9 +227,12 @@ def to_parquet(
226227
Parameters
227228
----------
228229
df : DataFrame
229-
path : str
230-
File path or Root Directory path. Will be used as Root Directory path
231-
while writing a partitioned dataset.
230+
path : str or file-like object
231+
If a string, it will be used as Root Directory path
232+
when writing a partitioned dataset. By file-like object,
233+
we refer to objects with a write() method, such as a file handler
234+
(e.g. via builtin open function) or io.BytesIO. The engine
235+
fastparquet does not accept file-like objects.
232236
233237
.. versionchanged:: 0.24.0
234238
@@ -251,8 +255,9 @@ def to_parquet(
251255
.. versionadded:: 0.24.0
252256
253257
partition_cols : str or list, optional, default None
254-
Column names by which to partition the dataset
255-
Columns are partitioned in the order they are given
258+
Column names by which to partition the dataset.
259+
Columns are partitioned in the order they are given.
260+
Must be None if path is not a string.
256261
257262
.. versionadded:: 0.24.0
258263

0 commit comments

Comments
 (0)