@@ -212,28 +212,37 @@ def check_error_on_write(self, df, engine, exc):
212
212
with tm .ensure_clean () as path :
213
213
to_parquet (df , path , engine , compression = None )
214
214
215
- def check_round_trip (self , df , engine , expected = None ,
216
- write_kwargs = None , read_kwargs = None ,
217
- check_names = True ):
215
+ def do_round_trip (self , df , path , engine_impl , expected = None ,
216
+ write_kwargs = None , read_kwargs = None ,
217
+ check_names = True ):
218
+
218
219
if write_kwargs is None :
219
- write_kwargs = {}
220
+ write_kwargs = {'compression' : None }
221
+
220
222
if read_kwargs is None :
221
223
read_kwargs = {}
222
- with tm .ensure_clean () as path :
223
- df .to_parquet (path , engine , ** write_kwargs )
224
- result = read_parquet (path , engine , ** read_kwargs )
225
224
226
- if expected is None :
227
- expected = df
228
- tm .assert_frame_equal (result , expected , check_names = check_names )
225
+ df .to_parquet (path , engine_impl , ** write_kwargs )
226
+ actual = read_parquet (path , engine_impl , ** read_kwargs )
229
227
230
- # repeat
231
- to_parquet (df , path , engine , ** write_kwargs )
232
- result = pd .read_parquet (path , engine , ** read_kwargs )
228
+ if expected is None :
229
+ expected = df
233
230
234
- if expected is None :
235
- expected = df
236
- tm .assert_frame_equal (result , expected , check_names = check_names )
231
+ tm .assert_frame_equal (expected , actual , check_names = check_names )
232
+
233
+ def check_round_trip (self , df , engine , expected = None ,
234
+ write_kwargs = None , read_kwargs = None ,
235
+ check_names = True ):
236
+
237
+ with tm .ensure_clean () as path :
238
+ self .do_round_trip (df , path , engine , expected ,
239
+ write_kwargs = write_kwargs , read_kwargs = read_kwargs ,
240
+ check_names = check_names )
241
+
242
+ # repeat
243
+ self .do_round_trip (df , path , engine , expected ,
244
+ write_kwargs = write_kwargs , read_kwargs = read_kwargs ,
245
+ check_names = check_names )
237
246
238
247
239
248
class TestBasic (Base ):
@@ -251,7 +260,7 @@ def test_columns_dtypes(self, engine):
251
260
252
261
# unicode
253
262
df .columns = [u'foo' , u'bar' ]
254
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
263
+ self .check_round_trip (df , engine )
255
264
256
265
def test_columns_dtypes_invalid (self , engine ):
257
266
@@ -292,7 +301,6 @@ def test_read_columns(self, engine):
292
301
293
302
expected = pd .DataFrame ({'string' : list ('abc' )})
294
303
self .check_round_trip (df , engine , expected = expected ,
295
- write_kwargs = {'compression' : None },
296
304
read_kwargs = {'columns' : ['string' ]})
297
305
298
306
def test_write_index (self , engine ):
@@ -304,7 +312,7 @@ def test_write_index(self, engine):
304
312
pytest .skip ("pyarrow is < 0.7.0" )
305
313
306
314
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
307
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
315
+ self .check_round_trip (df , engine )
308
316
309
317
indexes = [
310
318
[2 , 3 , 4 ],
@@ -315,15 +323,12 @@ def test_write_index(self, engine):
315
323
# non-default index
316
324
for index in indexes :
317
325
df .index = index
318
- self .check_round_trip (
319
- df , engine ,
320
- write_kwargs = {'compression' : None },
321
- check_names = check_names )
326
+ self .check_round_trip (df , engine , check_names = check_names )
322
327
323
328
# index with meta-data
324
329
df .index = [0 , 1 , 2 ]
325
330
df .index .name = 'foo'
326
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
331
+ self .check_round_trip (df , engine )
327
332
328
333
def test_write_multiindex (self , pa_ge_070 ):
329
334
# Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version
@@ -332,7 +337,7 @@ def test_write_multiindex(self, pa_ge_070):
332
337
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
333
338
index = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('b' , 1 )])
334
339
df .index = index
335
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
340
+ self .check_round_trip (df , engine )
336
341
337
342
def test_write_column_multiindex (self , engine ):
338
343
# column multi-index
@@ -428,13 +433,7 @@ def test_categorical_unsupported(self, pa_lt_070):
428
433
429
434
def test_s3_roundtrip (self , df_compat , s3_resource , pa ):
430
435
# GH #19134
431
- df_compat .to_parquet ('s3://pandas-test/test.parquet' ,
432
- engine = pa , compression = None )
433
-
434
- expected = df_compat
435
- actual = read_parquet ('s3://pandas-test/test.parquet' , engine = pa )
436
-
437
- tm .assert_frame_equal (expected , actual )
436
+ self .do_round_trip (df_compat , 's3://pandas-test/test.parquet' , pa )
438
437
439
438
440
439
class TestParquetFastParquet (Base ):
@@ -446,7 +445,7 @@ def test_basic(self, fp, df_full):
446
445
# additional supported types for fastparquet
447
446
df ['timedelta' ] = pd .timedelta_range ('1 day' , periods = 3 )
448
447
449
- self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
448
+ self .check_round_trip (df , fp )
450
449
451
450
@pytest .mark .skip (reason = "not supported" )
452
451
def test_duplicate_columns (self , fp ):
@@ -459,8 +458,7 @@ def test_duplicate_columns(self, fp):
459
458
def test_bool_with_none (self , fp ):
460
459
df = pd .DataFrame ({'a' : [True , None , False ]})
461
460
expected = pd .DataFrame ({'a' : [1.0 , np .nan , 0.0 ]}, dtype = 'float16' )
462
- self .check_round_trip (df , fp , expected = expected ,
463
- write_kwargs = {'compression' : None })
461
+ self .check_round_trip (df , fp , expected = expected )
464
462
465
463
def test_unsupported (self , fp ):
466
464
@@ -476,7 +474,7 @@ def test_categorical(self, fp):
476
474
if LooseVersion (fastparquet .__version__ ) < LooseVersion ("0.1.3" ):
477
475
pytest .skip ("CategoricalDtype not supported for older fp" )
478
476
df = pd .DataFrame ({'a' : pd .Categorical (list ('abc' ))})
479
- self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
477
+ self .check_round_trip (df , fp )
480
478
481
479
def test_datetime_tz (self , fp ):
482
480
# doesn't preserve tz
@@ -485,8 +483,7 @@ def test_datetime_tz(self, fp):
485
483
486
484
# warns on the coercion
487
485
with catch_warnings (record = True ):
488
- self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ),
489
- write_kwargs = {'compression' : None })
486
+ self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ))
490
487
491
488
def test_filter_row_groups (self , fp ):
492
489
d = {'a' : list (range (0 , 3 ))}
@@ -497,3 +494,10 @@ def test_filter_row_groups(self, fp):
497
494
result = read_parquet (path , fp , filters = [('a' , '==' , 0 )])
498
495
assert len (result ) == 1
499
496
497
+ def test_s3_roundtrip (self , df_compat , s3_resource , fp ):
498
+ print (s3_resource , fp )
499
+
500
+ # GH #19134
501
+ with pytest .raises (TypeError ):
502
+ self .do_round_trip (df_compat , 's3://pandas-test/test.parquet' , fp )
503
+
0 commit comments