1
1
""" parquet compat """
2
2
3
- from typing import Any , Dict , Optional
3
+ from typing import Any , AnyStr , Dict , List , Optional
4
4
from warnings import catch_warnings
5
5
6
+ from pandas ._typing import FilePathOrBuffer
6
7
from pandas .compat ._optional import import_optional_dependency
7
8
from pandas .errors import AbstractMethodError
8
9
@@ -85,10 +86,10 @@ def __init__(self):
85
86
def write (
86
87
self ,
87
88
df : DataFrame ,
88
- path ,
89
- compression = "snappy" ,
89
+ path : FilePathOrBuffer [ AnyStr ] ,
90
+ compression : Optional [ str ] = "snappy" ,
90
91
index : Optional [bool ] = None ,
91
- partition_cols = None ,
92
+ partition_cols : Optional [ List [ str ]] = None ,
92
93
** kwargs ,
93
94
):
94
95
self .validate_dataframe (df )
@@ -213,11 +214,11 @@ def read(self, path, columns=None, **kwargs):
213
214
214
215
def to_parquet (
215
216
df : DataFrame ,
216
- path ,
217
+ path : FilePathOrBuffer [ AnyStr ] ,
217
218
engine : str = "auto" ,
218
- compression = "snappy" ,
219
+ compression : Optional [ str ] = "snappy" ,
219
220
index : Optional [bool ] = None ,
220
- partition_cols = None ,
221
+ partition_cols : Optional [ List [ str ]] = None ,
221
222
** kwargs ,
222
223
):
223
224
"""
@@ -226,9 +227,12 @@ def to_parquet(
226
227
Parameters
227
228
----------
228
229
df : DataFrame
229
- path : str
230
- File path or Root Directory path. Will be used as Root Directory path
231
- while writing a partitioned dataset.
230
+ path : str or file-like object
231
+ If a string, it will be used as Root Directory path
232
+ when writing a partitioned dataset. By file-like object,
233
+ we refer to objects with a write() method, such as a file handler
234
+ (e.g. via builtin open function) or io.BytesIO. The engine
235
+ fastparquet does not accept file-like objects.
232
236
233
237
.. versionchanged:: 0.24.0
234
238
@@ -251,8 +255,9 @@ def to_parquet(
251
255
.. versionadded:: 0.24.0
252
256
253
257
partition_cols : str or list, optional, default None
254
- Column names by which to partition the dataset
255
- Columns are partitioned in the order they are given
258
+ Column names by which to partition the dataset.
259
+ Columns are partitioned in the order they are given.
260
+ Must be None if path is not a string.
256
261
257
262
.. versionadded:: 0.24.0
258
263
0 commit comments