@@ -236,12 +236,17 @@ def test_basic(self):
236
236
expected = DataFrame ({'a' : [1 , 0 , 0 ],
237
237
'b' : [0 , 1 , 0 ],
238
238
'c' : [0 , 0 , 1 ]}, dtype = self .dtype )
239
- assert_frame_equal (get_dummies (s_list , sparse = self .sparse , dtype = self .dtype ), expected )
240
- assert_frame_equal (get_dummies (s_series , sparse = self .sparse , dtype = self .dtype ), expected )
239
+ result = get_dummies (s_list , sparse = self .sparse , dtype = self .dtype )
240
+ assert_frame_equal (result , expected )
241
+
242
+ result = get_dummies (s_series , sparse = self .sparse , dtype = self .dtype )
243
+ assert_frame_equal (result , expected )
241
244
242
245
expected .index = list ('ABC' )
243
- assert_frame_equal (
244
- get_dummies (s_series_index , sparse = self .sparse , dtype = self .dtype ), expected )
246
+ result = get_dummies (s_series_index ,
247
+ sparse = self .sparse ,
248
+ dtype = self .dtype )
249
+ assert_frame_equal (result , expected )
245
250
246
251
def test_basic_types (self ):
247
252
# GH 10531
@@ -268,13 +273,22 @@ def test_basic_types(self):
268
273
result = get_dummies (s_series , sparse = self .sparse , dtype = self .dtype )
269
274
compare (result , expected )
270
275
271
- result = get_dummies (s_df , sparse = self .sparse , columns = s_df .columns , dtype = self .dtype )
276
+ result = get_dummies (s_df ,
277
+ sparse = self .sparse ,
278
+ columns = s_df .columns ,
279
+ dtype = self .dtype )
272
280
tm .assert_series_equal (result .get_dtype_counts (),
273
281
Series ({self .dtype_str : 8 }))
274
282
275
- result = get_dummies (s_df , sparse = self .sparse , columns = ['a' ], dtype = self .dtype )
276
- expected = Series ({self .dtype_str : 3 , 'int64' : 1 , 'object' : 1 }).sort_values ()
277
- tm .assert_series_equal (result .get_dtype_counts ().sort_values (), expected )
283
+ result = get_dummies (s_df ,
284
+ sparse = self .sparse ,
285
+ columns = ['a' ],
286
+ dtype = self .dtype )
287
+ expected = Series ({self .dtype_str : 3 ,
288
+ 'int64' : 1 ,
289
+ 'object' : 1 }).sort_values ()
290
+ tm .assert_series_equal (result .get_dtype_counts ().sort_values (),
291
+ expected )
278
292
279
293
def test_just_na (self ):
280
294
just_na_list = [np .nan ]
@@ -302,7 +316,10 @@ def test_include_na(self):
302
316
assert_frame_equal (res , exp )
303
317
304
318
# Sparse dataframes do not allow nan labelled columns, see #GH8822
305
- res_na = get_dummies (s , dummy_na = True , sparse = self .sparse , dtype = self .dtype )
319
+ res_na = get_dummies (s ,
320
+ dummy_na = True ,
321
+ sparse = self .sparse ,
322
+ dtype = self .dtype )
306
323
exp_na = DataFrame ({nan : {0 : 0 , 1 : 0 , 2 : 1 },
307
324
'a' : {0 : 1 , 1 : 0 , 2 : 0 },
308
325
'b' : {0 : 0 , 1 : 1 , 2 : 0 }},
@@ -312,7 +329,10 @@ def test_include_na(self):
312
329
exp_na .columns = res_na .columns
313
330
assert_frame_equal (res_na , exp_na )
314
331
315
- res_just_na = get_dummies ([nan ], dummy_na = True , sparse = self .sparse , dtype = self .dtype )
332
+ res_just_na = get_dummies ([nan ],
333
+ dummy_na = True ,
334
+ sparse = self .sparse ,
335
+ dtype = self .dtype )
316
336
exp_just_na = DataFrame (Series (1 , index = [0 ]), columns = [nan ],
317
337
dtype = self .dtype )
318
338
tm .assert_numpy_array_equal (res_just_na .values , exp_just_na .values )
@@ -323,7 +343,10 @@ def test_unicode(self
323
343
e = 'e'
324
344
eacute = unicodedata .lookup ('LATIN SMALL LETTER E WITH ACUTE' )
325
345
s = [e , eacute , eacute ]
326
- res = get_dummies (s , prefix = 'letter' , sparse = self .sparse , dtype = self .dtype )
346
+ res = get_dummies (s ,
347
+ prefix = 'letter' ,
348
+ sparse = self .sparse ,
349
+ dtype = self .dtype )
327
350
exp = DataFrame ({'letter_e' : {0 : 1 ,
328
351
1 : 0 ,
329
352
2 : 0 },
@@ -360,21 +383,26 @@ def test_dataframe_dummies_prefix_list(self):
360
383
df = DataFrame ({'A' : ['a' , 'b' , 'a' ],
361
384
'B' : ['b' , 'b' , 'c' ],
362
385
'C' : [1 , 2 , 3 ]}, dtype = self .dtype )
363
- result = get_dummies (df , prefix = prefixes , sparse = self .sparse , dtype = self .dtype )
386
+ result = get_dummies (df ,
387
+ prefix = prefixes ,
388
+ sparse = self .sparse ,
389
+ dtype = self .dtype )
364
390
expected = DataFrame ({'C' : [1 , 2 , 3 ],
365
391
'from_A_a' : [1 , 0 , 1 ],
366
392
'from_A_b' : [0 , 1 , 0 ],
367
393
'from_B_b' : [1 , 1 , 0 ],
368
394
'from_B_c' : [0 , 0 , 1 ]}, dtype = self .dtype )
369
395
cols = expected .columns [1 :]
370
396
expected [cols ] = expected [cols ].astype (self .dtype )
371
- expected = expected [['C' , 'from_A_a' , 'from_A_b' , 'from_B_b' , 'from_B_c' ]]
397
+ expected = expected [['C' , 'from_A_a' , 'from_A_b' ,
398
+ 'from_B_b' , 'from_B_c' ]]
372
399
assert_frame_equal (result , expected )
373
400
374
401
def test_dataframe_dummies_prefix_str (self ):
375
402
# not that you should do this...
376
403
df = self .df
377
- result = get_dummies (df , prefix = 'bad' , sparse = self .sparse , dtype = self .dtype )
404
+ result = get_dummies (df , prefix = 'bad' , sparse = self .sparse ,
405
+ dtype = self .dtype )
378
406
expected = DataFrame ([[1 , 1 , 0 , 1 , 0 ],
379
407
[2 , 0 , 1 , 1 , 0 ],
380
408
[3 , 1 , 0 , 0 , 1 ]],
@@ -397,7 +425,8 @@ def test_dataframe_dummies_subset(self):
397
425
398
426
def test_dataframe_dummies_prefix_sep (self ):
399
427
df = self .df
400
- result = get_dummies (df , prefix_sep = '..' , sparse = self .sparse , dtype = self .dtype )
428
+ result = get_dummies (df , prefix_sep = '..' , sparse = self .sparse ,
429
+ dtype = self .dtype )
401
430
expected = DataFrame ({'C' : [1 , 2 , 3 ],
402
431
'A..a' : [1 , 0 , 1 ],
403
432
'A..b' : [0 , 1 , 0 ],
@@ -408,12 +437,17 @@ def test_dataframe_dummies_prefix_sep(self):
408
437
expected [cols ] = expected [cols ].astype (self .dtype )
409
438
assert_frame_equal (result , expected )
410
439
411
- result = get_dummies (df , prefix_sep = ['..' , '__' ], sparse = self .sparse , dtype = self .dtype )
440
+ result = get_dummies (df ,
441
+ prefix_sep = ['..' , '__' ],
442
+ sparse = self .sparse ,
443
+ dtype = self .dtype )
412
444
expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
413
445
assert_frame_equal (result , expected )
414
446
415
447
result = get_dummies (df , prefix_sep = {'A' : '..' ,
416
- 'B' : '__' }, sparse = self .sparse , dtype = self .dtype )
448
+ 'B' : '__' },
449
+ sparse = self .sparse ,
450
+ dtype = self .dtype )
417
451
assert_frame_equal (result , expected )
418
452
419
453
def test_dataframe_dummies_prefix_bad_length (self ):
@@ -429,7 +463,8 @@ def test_dataframe_dummies_prefix_dict(self):
429
463
df = DataFrame ({'A' : ['a' , 'b' , 'a' ],
430
464
'B' : ['b' , 'b' , 'c' ],
431
465
'C' : [1 , 2 , 3 ]}, dtype = self .dtype )
432
- result = get_dummies (df , prefix = prefixes , sparse = self .sparse , dtype = self .dtype )
466
+ result = get_dummies (df , prefix = prefixes , sparse = self .sparse ,
467
+ dtype = self .dtype )
433
468
expected = DataFrame ({'from_A_a' : [1 , 0 , 1 ],
434
469
'from_A_b' : [0 , 1 , 0 ],
435
470
'from_B_b' : [1 , 1 , 0 ],
@@ -440,7 +475,8 @@ def test_dataframe_dummies_prefix_dict(self):
440
475
def test_dataframe_dummies_with_na (self ):
441
476
df = self .df
442
477
df .loc [3 , :] = [np .nan , np .nan , np .nan ]
443
- result = get_dummies (df , dummy_na = True , sparse = self .sparse , dtype = self .dtype )
478
+ result = get_dummies (df , dummy_na = True , sparse = self .sparse ,
479
+ dtype = self .dtype )
444
480
expected = DataFrame ({'C' : [1 , 2 , 3 , np .nan ],
445
481
'A_a' : [1 , 0 , 1 , 0 ],
446
482
'A_b' : [0 , 1 , 0 , 0 ],
@@ -449,10 +485,12 @@ def test_dataframe_dummies_with_na(self):
449
485
'B_c' : [0 , 0 , 1 , 0 ],
450
486
'B_nan' : [0 , 0 , 0 , 1 ]}, dtype = self .dtype )
451
487
expected [['C' ]] = expected [['C' ]].astype (np .float64 )
452
- expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' , 'B_b' , 'B_c' , 'B_nan' ]]
488
+ expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' ,
489
+ 'B_b' , 'B_c' , 'B_nan' ]]
453
490
assert_frame_equal (result , expected )
454
491
455
- result = get_dummies (df , dummy_na = False , sparse = self .sparse , dtype = self .dtype )
492
+ result = get_dummies (df , dummy_na = False , sparse = self .sparse ,
493
+ dtype = self .dtype )
456
494
expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
457
495
assert_frame_equal (result , expected )
458
496
@@ -467,7 +505,6 @@ def test_dataframe_dummies_with_categorical(self):
467
505
'B_c' : [0 , 0 , 1 ],
468
506
'cat_x' : [1 , 0 , 0 ],
469
507
'cat_y' : [0 , 1 , 1 ]}, dtype = self .dtype )
470
- cols = ['A_a' , 'A_b' , 'B_b' , 'B_c' , 'cat_x' , 'cat_y' ]
471
508
expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ,
472
509
'cat_x' , 'cat_y' ]]
473
510
assert_frame_equal (result , expected )
@@ -482,10 +519,16 @@ def test_basic_drop_first(self):
482
519
expected = DataFrame ({'b' : [0 , 1 , 0 ],
483
520
'c' : [0 , 0 , 1 ]}, dtype = self .dtype )
484
521
485
- result = get_dummies (s_list , sparse = self .sparse , drop_first = True , dtype = self .dtype )
522
+ result = get_dummies (s_list ,
523
+ sparse = self .sparse ,
524
+ drop_first = True ,
525
+ dtype = self .dtype )
486
526
assert_frame_equal (result , expected )
487
527
488
- result = get_dummies (s_series , sparse = self .sparse , drop_first = True , dtype = self .dtype )
528
+ result = get_dummies (s_series ,
529
+ sparse = self .sparse ,
530
+ drop_first = True ,
531
+ dtype = self .dtype )
489
532
assert_frame_equal (result , expected )
490
533
491
534
expected .index = list ('ABC' )
@@ -515,7 +558,10 @@ def test_basic_drop_first_one_level(self):
515
558
def test_basic_drop_first_NA (self ):
516
559
# Test NA hadling together with drop_first
517
560
s_NA = ['a' , 'b' , np .nan ]
518
- res = get_dummies (s_NA , sparse = self .sparse , drop_first = True , dtype = self .dtype )
561
+ res = get_dummies (s_NA ,
562
+ sparse = self .sparse ,
563
+ drop_first = True ,
564
+ dtype = self .dtype )
519
565
exp = DataFrame ({'b' : [0 , 1 , 0 ]}, dtype = self .dtype )
520
566
assert_frame_equal (res , exp )
521
567
@@ -533,15 +579,21 @@ def test_basic_drop_first_NA(self):
533
579
534
580
def test_dataframe_dummies_drop_first (self ):
535
581
df = self .df [['A' , 'B' ]]
536
- result = get_dummies (df , sparse = self .sparse , drop_first = True , dtype = self .dtype )
582
+ result = get_dummies (df ,
583
+ sparse = self .sparse ,
584
+ drop_first = True ,
585
+ dtype = self .dtype )
537
586
expected = DataFrame ({'A_b' : [0 , 1 , 0 ],
538
587
'B_c' : [0 , 0 , 1 ]}, dtype = self .dtype )
539
588
assert_frame_equal (result , expected )
540
589
541
590
def test_dataframe_dummies_drop_first_with_categorical (self ):
542
591
df = self .df
543
592
df ['cat' ] = pd .Categorical (['x' , 'y' , 'y' ])
544
- result = get_dummies (df , sparse = self .sparse , drop_first = True , dtype = self .dtype )
593
+ result = get_dummies (df ,
594
+ sparse = self .sparse ,
595
+ drop_first = True ,
596
+ dtype = self .dtype )
545
597
expected = DataFrame ({'C' : [1 , 2 , 3 ],
546
598
'A_b' : [0 , 1 , 0 ],
547
599
'B_c' : [0 , 0 , 1 ],
@@ -633,6 +685,7 @@ class TestGetDummiesSparse(TestGetDummies):
633
685
def test_include_na (self ):
634
686
super (TestGetDummiesSparse , self ).test_include_na ()
635
687
688
+
636
689
class TestGetDummiesDtypeMixin (object ):
637
690
dtype_str = 'float64'
638
691
@@ -652,12 +705,16 @@ def test_dataframe_dummies_prefix_sep_bad_length(self):
652
705
def test_basic_drop_first_one_level (self ):
653
706
pass
654
707
708
+
655
709
class TestGetDummiesDtypeFloat (TestGetDummiesDtypeMixin , TestGetDummies ):
656
710
pass
657
711
658
- class TestGetDummiesSparseDtypeFloat (TestGetDummiesDtypeMixin , TestGetDummiesSparse ):
712
+
713
+ class TestGetDummiesSparseDtypeFloat (TestGetDummiesDtypeMixin ,
714
+ TestGetDummiesSparse ):
659
715
pass
660
716
717
+
661
718
class TestMakeAxisDummies (object ):
662
719
663
720
def test_preserve_categorical_dtype (self ):
0 commit comments