@@ -1136,14 +1136,14 @@ def test_join_multi_levels(self):
1136
1136
1137
1137
def f ():
1138
1138
household .join (portfolio , how = 'inner' )
1139
- pytest . raises (ValueError , f )
1139
+ self . assertRaises (ValueError , f )
1140
1140
1141
1141
portfolio2 = portfolio .copy ()
1142
1142
portfolio2 .index .set_names (['household_id' , 'foo' ])
1143
1143
1144
1144
def f ():
1145
1145
portfolio2 .join (portfolio , how = 'inner' )
1146
- pytest . raises (ValueError , f )
1146
+ self . assertRaises (ValueError , f )
1147
1147
1148
1148
def test_join_multi_levels2 (self ):
1149
1149
@@ -1182,11 +1182,7 @@ def test_join_multi_levels2(self):
1182
1182
.set_index (["household_id" , "asset_id" , "t" ])
1183
1183
.reindex (columns = ['share' , 'log_return' ]))
1184
1184
1185
- def f ():
1186
- household .join (log_return , how = 'inner' )
1187
- pytest .raises (NotImplementedError , f )
1188
-
1189
- # this is the equivalency
1185
+ # this is equivalency the
1190
1186
result = (merge (household .reset_index (), log_return .reset_index (),
1191
1187
on = ['asset_id' ], how = 'inner' )
1192
1188
.set_index (['household_id' , 'asset_id' , 't' ]))
@@ -1195,7 +1191,7 @@ def f():
1195
1191
expected = (
1196
1192
DataFrame (dict (
1197
1193
household_id = [1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 , 4 ],
1198
- asset_id = ["nl0000301109" , "nl0000289783 " , "gb00b03mlx29" ,
1194
+ asset_id = ["nl0000301109" , "nl0000301109 " , "gb00b03mlx29" ,
1199
1195
"gb00b03mlx29" , "gb00b03mlx29" ,
1200
1196
"gb00b03mlx29" , "gb00b03mlx29" , "gb00b03mlx29" ,
1201
1197
"lu0197800237" , "lu0197800237" ,
@@ -1208,12 +1204,179 @@ def f():
1208
1204
.09604978 , - .06524096 , .03532373 ,
1209
1205
.03025441 , .036997 , None , None ]
1210
1206
))
1211
- .set_index (["household_id" , "asset_id" , "t" ]))
1207
+ .set_index (["household_id" , "asset_id" , "t" ])
1208
+ .reindex (columns = ['share' , 'log_return' ]))
1212
1209
1213
- def f ():
1214
- household . join ( log_return , how = 'outer' )
1215
- pytest . raises ( NotImplementedError , f )
1210
+ result = ( merge ( household . reset_index (), log_return . reset_index (),
1211
+ on = [ 'asset_id' ] , how = 'outer' )
1212
+ . set_index ([ 'household_id' , 'asset_id' , 't' ]) )
1216
1213
1214
+ assert_frame_equal (result , expected )
1215
+
1216
+ def test_join_multi_levels3 (self ):
1217
+ # Multi-index join tests
1218
+ # Self join
1219
+ matrix = (
1220
+ pd .DataFrame (
1221
+ dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1222
+ Destination = [1 , 2 , 1 , 3 , 1 ],
1223
+ Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1224
+ TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1225
+ Trips = [1987 , 3647 , 2470 , 4296 , 4444 ]),
1226
+ columns = ['Origin' , 'Destination' , 'Period' ,
1227
+ 'TripPurp' , 'Trips' ])
1228
+ .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1229
+
1230
+ distances = (
1231
+ pd .DataFrame (
1232
+ dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1233
+ Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1234
+ Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1235
+ LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1236
+ Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1237
+ columns = ['Origin' , 'Destination' , 'Period' ,
1238
+ 'LinkType' , 'Distance' ])
1239
+ .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1240
+
1241
+ expected = (
1242
+ pd .DataFrame (
1243
+ dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1244
+ Destination = [1 , 2 , 1 , 3 , 1 ],
1245
+ Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1246
+ TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1247
+ Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1248
+ Trips_joined = [1987 , 3647 , 2470 , 4296 , 4444 ]),
1249
+ columns = ['Origin' , 'Destination' , 'Period' ,
1250
+ 'TripPurp' , 'Trips' , 'Trips_joined' ])
1251
+ .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1252
+
1253
+ result = matrix .join (matrix , how = 'inner' , rsuffix = '_joined' )
1254
+ assert_frame_equal (result , expected )
1255
+
1256
+ #Left join
1257
+ expected = (
1258
+ pd .DataFrame (
1259
+ dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1260
+ Destination = [1 , 2 , 1 , 3 , 1 ],
1261
+ Period = ['AM' ,'PM' ,'IP' , 'AM' , 'OP' ],
1262
+ TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1263
+ Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1264
+ Distance = [100 , 80 , 90 , np .nan , 75 ]),
1265
+ columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' ,
1266
+ 'Trips' , 'Distance' ])
1267
+ .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1268
+
1269
+ result = matrix .join (distances , how = 'left' )
1270
+ assert_frame_equal (result , expected )
1271
+
1272
+ #Right join
1273
+ expected = (
1274
+ pd .DataFrame (
1275
+ dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1276
+ Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1277
+ Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1278
+ LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1279
+ Trips = [1987 , 3647 , 2470 , np .nan , 4444 , np .nan , np .nan ],
1280
+ Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1281
+ columns = ['Origin' , 'Destination' , 'Period' ,
1282
+ 'LinkType' , 'Trips' , 'Distance' ])
1283
+ .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1284
+
1285
+ result = matrix .join (distances , how = 'right' )
1286
+ assert_frame_equal (result , expected )
1287
+
1288
+ #Inner join
1289
+ expected = (
1290
+ pd .DataFrame (
1291
+ dict (Origin = [1 , 1 , 2 , 3 ],
1292
+ Destination = [1 , 2 , 1 , 1 ],
1293
+ Period = ['AM' ,'PM' ,'IP' , 'OP' ],
1294
+ Trips = [1987 , 3647 , 2470 , 4444 ],
1295
+ Distance = [100 , 80 , 90 , 75 ]),
1296
+ columns = ['Origin' , 'Destination' , 'Period' , 'Trips' , 'Distance' ])
1297
+ .set_index (['Origin' , 'Destination' , 'Period' ]))
1298
+
1299
+ result = matrix .join (distances , how = 'inner' )
1300
+ assert_frame_equal (result , expected )
1301
+
1302
+ #Outer join
1303
+ expected = (
1304
+ pd .DataFrame (
1305
+ dict (Origin = [1 , 1 , 2 , 2 , 2 , 3 , 3 , 5 ],
1306
+ Destination = [1 , 2 , 1 , 2 , 3 , 1 , 2 , 6 ],
1307
+ Period = ['AM' ,'PM' ,'IP' , 'AM' , 'AM' , 'OP' , 'IP' , 'AM' ],
1308
+ TripPurp = ['hbw' , 'nhb' , 'hbo' , np .nan , 'nhb' ,
1309
+ 'hbw' , np .nan , np .nan ],
1310
+ LinkType = ['a' , 'a' , 'c' , 'b' , np .nan , 'a' , 'b' , 'a' ],
1311
+ Trips = [1987 , 3647 , 2470 , np .nan , 4296 , 4444 , np .nan , np .nan ],
1312
+ Distance = [100 , 80 , 90 , 80 , np .nan , 75 , 35 , 55 ]),
1313
+ columns = ['Origin' , 'Destination' , 'Period' , 'TripPurp' , 'LinkType' ,
1314
+ 'Trips' , 'Distance' ])
1315
+ .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' , 'LinkType' ]))
1316
+
1317
+
1318
+ result = matrix .join (distances , how = 'outer' )
1319
+ assert_frame_equal (result , expected )
1320
+
1321
+ #Non-unique resulting index
1322
+ distances2 = (
1323
+ pd .DataFrame (
1324
+ dict (Origin = [1 , 1 , 2 ],
1325
+ Destination = [1 , 1 , 1 ],
1326
+ Period = ['AM' ,'AM' , 'PM' ],
1327
+ LinkType = ['a' , 'b' , 'a' ],
1328
+ Distance = [100 , 110 , 120 ]),
1329
+ columns = ['Origin' , 'Destination' , 'Period' ,
1330
+ 'LinkType' , 'Distance' ])
1331
+ .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1332
+
1333
+ def f ():
1334
+ matrix .join (distances2 , how = 'left' )
1335
+ self .assertRaises (TypeError , f )
1336
+
1337
+ #No-overlapping level names
1338
+ distances2 = (
1339
+ pd .DataFrame (
1340
+ dict (Orig = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1341
+ Dest = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1342
+ Per = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ,'IP' , 'AM' ],
1343
+ LinkTyp = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1344
+ Dist = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1345
+ columns = ['Orig' , 'Dest' , 'Per' ,
1346
+ 'LinkTyp' , 'Dist' ])
1347
+ .set_index (['Orig' , 'Dest' ,'Per' , 'LinkTyp' ]))
1348
+
1349
+ def f ():
1350
+ matrix .join (distances2 , how = 'left' )
1351
+ self .assertRaises (ValueError , f )
1352
+
1353
+ # Empty Level
1354
+ distances2 = (
1355
+ pd .DataFrame (
1356
+ dict (Origin = [1 , 1 , 2 , 2 , 3 , 3 , 5 ],
1357
+ Destination = [1 , 2 , 1 , 2 , 1 , 2 , 6 ],
1358
+ Period = [np .nan ,np .nan ,np .nan ,np .nan ,np .nan ,np .nan ,np .nan ],
1359
+ LinkType = ['a' , 'a' , 'c' , 'b' , 'a' , 'b' , 'a' ],
1360
+ Distance = [100 , 80 , 90 , 80 , 75 , 35 , 55 ]),
1361
+ columns = ['Origin' , 'Destination' , 'Period' ,
1362
+ 'LinkType' , 'Distance' ])
1363
+ .set_index (['Origin' , 'Destination' ,'Period' , 'LinkType' ]))
1364
+
1365
+
1366
+ expected = (
1367
+ pd .DataFrame (
1368
+ dict (Origin = [1 , 1 , 2 , 2 , 3 ],
1369
+ Destination = [1 , 2 , 1 , 3 , 1 ],
1370
+ Period = ['AM' ,'PM' ,'IP' ,'AM' ,'OP' ],
1371
+ TripPurp = ['hbw' , 'nhb' , 'hbo' , 'nhb' , 'hbw' ],
1372
+ Trips = [1987 , 3647 , 2470 , 4296 , 4444 ],
1373
+ Distance = [np .nan , np .nan , np .nan , np .nan , np .nan ]),
1374
+ columns = ['Origin' , 'Destination' , 'Period' ,
1375
+ 'TripPurp' , 'Trips' , 'Distance' ])
1376
+ .set_index (['Origin' , 'Destination' , 'Period' , 'TripPurp' ]))
1377
+
1378
+ result = matrix .join (distances2 , how = 'left' )
1379
+ assert_frame_equal (result , expected )
1217
1380
1218
1381
@pytest .fixture
1219
1382
def df ():
0 commit comments