@@ -228,6 +228,7 @@ def read(
228
228
self ,
229
229
path ,
230
230
columns = None ,
231
+ filters = None ,
231
232
use_nullable_dtypes : bool = False ,
232
233
dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
233
234
storage_options : StorageOptions | None = None ,
@@ -257,7 +258,11 @@ def read(
257
258
)
258
259
try :
259
260
pa_table = self .api .parquet .read_table (
260
- path_or_handle , columns = columns , filesystem = filesystem , ** kwargs
261
+ path_or_handle ,
262
+ columns = columns ,
263
+ filesystem = filesystem ,
264
+ filters = filters ,
265
+ ** kwargs ,
261
266
)
262
267
result = pa_table .to_pandas (** to_pandas_kwargs )
263
268
@@ -335,6 +340,7 @@ def read(
335
340
self ,
336
341
path ,
337
342
columns = None ,
343
+ filters = None ,
338
344
storage_options : StorageOptions | None = None ,
339
345
filesystem = None ,
340
346
** kwargs ,
@@ -375,7 +381,7 @@ def read(
375
381
376
382
try :
377
383
parquet_file = self .api .ParquetFile (path , ** parquet_kwargs )
378
- return parquet_file .to_pandas (columns = columns , ** kwargs )
384
+ return parquet_file .to_pandas (columns = columns , filters = filters , ** kwargs )
379
385
finally :
380
386
if handles is not None :
381
387
handles .close ()
@@ -487,6 +493,7 @@ def read_parquet(
487
493
use_nullable_dtypes : bool | lib .NoDefault = lib .no_default ,
488
494
dtype_backend : DtypeBackend | lib .NoDefault = lib .no_default ,
489
495
filesystem : Any = None ,
496
+ filters : list [tuple ] | list [list [tuple ]] | None = None ,
490
497
** kwargs ,
491
498
) -> DataFrame :
492
499
"""
@@ -517,7 +524,6 @@ def read_parquet(
517
524
if you wish to use its implementation.
518
525
columns : list, default=None
519
526
If not None, only these columns will be read from the file.
520
-
521
527
{storage_options}
522
528
523
529
.. versionadded:: 1.3.0
@@ -550,6 +556,24 @@ def read_parquet(
550
556
551
557
.. versionadded:: 2.1.0
552
558
559
+ filters : List[Tuple] or List[List[Tuple]], default None
560
+ To filter out data.
561
+ Filter syntax: [[(column, op, val), ...],...]
562
+ where op is [==, =, >, >=, <, <=, !=, in, not in]
563
+ The innermost tuples are transposed into a set of filters applied
564
+ through an `AND` operation.
565
+ The outer list combines these sets of filters through an `OR`
566
+ operation.
567
+ A single list of tuples can also be used, meaning that no `OR`
568
+ operation between set of filters is to be conducted.
569
+
570
+ Using this argument will NOT result in row-wise filtering of the final
571
+ partitions unless ``engine="pyarrow"`` is also specified. For
572
+ other engines, filtering is only performed at the partition level, that is,
573
+ to prevent the loading of some row-groups and/or files.
574
+
575
+ .. versionadded:: 2.1.0
576
+
553
577
**kwargs
554
578
Any additional kwargs are passed to the engine.
555
579
@@ -632,6 +656,7 @@ def read_parquet(
632
656
return impl .read (
633
657
path ,
634
658
columns = columns ,
659
+ filters = filters ,
635
660
storage_options = storage_options ,
636
661
use_nullable_dtypes = use_nullable_dtypes ,
637
662
dtype_backend = dtype_backend ,
0 commit comments