1
1
""" parquet compat """
2
2
3
+ from typing import Any , Dict , Optional
3
4
from warnings import catch_warnings
4
5
5
6
from pandas .compat ._optional import import_optional_dependency
10
11
from pandas .io .common import get_filepath_or_buffer , is_gcs_url , is_s3_url
11
12
12
13
13
- def get_engine (engine ) :
14
+ def get_engine (engine : str ) -> "BaseImpl" :
14
15
""" return our implementation """
15
16
16
17
if engine == "auto" :
@@ -35,19 +36,15 @@ def get_engine(engine):
35
36
"support"
36
37
)
37
38
38
- if engine not in ["pyarrow" , "fastparquet" ]:
39
- raise ValueError ("engine must be one of 'pyarrow', 'fastparquet'" )
40
-
41
39
if engine == "pyarrow" :
42
40
return PyArrowImpl ()
43
41
elif engine == "fastparquet" :
44
42
return FastParquetImpl ()
45
43
44
+ raise ValueError ("engine must be one of 'pyarrow', 'fastparquet'" )
46
45
47
- class BaseImpl :
48
-
49
- api = None # module
50
46
47
+ class BaseImpl :
51
48
@staticmethod
52
49
def validate_dataframe (df ):
53
50
@@ -74,7 +71,7 @@ def read(self, path, columns=None, **kwargs):
74
71
75
72
class PyArrowImpl (BaseImpl ):
76
73
def __init__ (self ):
77
- pyarrow = import_optional_dependency (
74
+ import_optional_dependency (
78
75
"pyarrow" , extra = "pyarrow is required for parquet support."
79
76
)
80
77
import pyarrow .parquet
@@ -87,13 +84,14 @@ def write(
87
84
path ,
88
85
compression = "snappy" ,
89
86
coerce_timestamps = "ms" ,
90
- index = None ,
87
+ index : Optional [ bool ] = None ,
91
88
partition_cols = None ,
92
89
** kwargs ,
93
90
):
94
91
self .validate_dataframe (df )
95
92
path , _ , _ , _ = get_filepath_or_buffer (path , mode = "wb" )
96
93
94
+ from_pandas_kwargs : Dict [str , Any ]
97
95
if index is None :
98
96
from_pandas_kwargs = {}
99
97
else :
@@ -203,7 +201,7 @@ def to_parquet(
203
201
path ,
204
202
engine = "auto" ,
205
203
compression = "snappy" ,
206
- index = None ,
204
+ index : Optional [ bool ] = None ,
207
205
partition_cols = None ,
208
206
** kwargs ,
209
207
):
0 commit comments