@@ -273,19 +273,26 @@ async def all_collections(self, username_header: dict, **kwargs) -> Collections:
273
273
collections = []
274
274
275
275
while True :
276
- temp_collections , next_token = await self .database .get_all_collections (
277
- token = token , limit = limit , base_url = base_url
276
+ temp_collections , next_token , hit_tokens = (
277
+ await self .database .get_all_collections (
278
+ token = token , limit = limit , base_url = base_url
279
+ )
278
280
)
279
281
280
- for collection in temp_collections :
282
+ for i , (collection , hit_token ) in enumerate (
283
+ zip (temp_collections , hit_tokens )
284
+ ):
281
285
# Get access control array for each collection
282
286
access_control = collection ["access_control" ]
283
287
collection .pop ("access_control" )
284
288
# Append collection to list if user has access
285
289
if int (access_control [- 1 ]) or int (access_control [user_index ]):
286
290
collections .append (collection )
287
291
if len (collections ) >= limit :
288
- break
292
+ # Extract token from last result
293
+ if i < len (temp_collections ) - 1 :
294
+ next_token = hit_token
295
+ break
289
296
290
297
# If collections now less than limit and more results, will need to run search again, giving next_token
291
298
if len (collections ) >= limit or not next_token :
@@ -338,29 +345,33 @@ async def all_catalogs(
338
345
339
346
while True :
340
347
# Search is run continually until limit is reached or no more results
341
- temp_catalogs , next_token = await self .database .get_all_catalogs (
342
- catalog_path = catalog_path ,
343
- token = token ,
344
- limit = limit ,
345
- base_url = base_url ,
346
- user_index = user_index ,
347
- conformance_classes = self .conformance_classes (),
348
+ temp_catalogs , next_token , hit_tokens = (
349
+ await self .database .get_all_catalogs (
350
+ catalog_path = catalog_path ,
351
+ token = token ,
352
+ limit = limit ,
353
+ base_url = base_url ,
354
+ user_index = user_index ,
355
+ conformance_classes = self .conformance_classes (),
356
+ )
348
357
)
349
358
350
- for catalog in temp_catalogs :
359
+ for i , ( catalog , hit_token ) in enumerate ( zip ( temp_catalogs , hit_tokens )) :
351
360
# Get access control array for each catalog
352
361
access_control = catalog ["access_control" ]
353
362
catalog .pop ("access_control" )
354
363
# Add catalog to list if user has access
355
364
if int (access_control [- 1 ]) or int (access_control [user_index ]):
356
365
catalogs .append (catalog )
357
366
if len (catalogs ) >= limit :
358
- break
367
+ if i < len (temp_catalogs ) - 1 :
368
+ # Extract token from last result
369
+ next_token = hit_token
370
+ break
359
371
360
372
# If catalogs now less than limit and more results, will need to run search again, giving next_token
361
373
if len (catalogs ) >= limit or not next_token :
362
374
# TODO: implement smarter token logic to return token of last returned ES entry
363
- next_token = token
364
375
break
365
376
token = next_token
366
377
@@ -465,7 +476,7 @@ async def get_catalog(
465
476
)
466
477
467
478
# Assume at most 100 collections in a catalog for the time being, may need to increase
468
- collections , _ = await self .database .get_catalog_collections (
479
+ collections , _ , _ = await self .database .get_catalog_collections (
469
480
catalog_path = catalog_path ,
470
481
base_url = base_url ,
471
482
limit = NUMBER_OF_CATALOG_COLLECTIONS ,
@@ -560,28 +571,34 @@ async def get_catalog_collections(
560
571
collections = []
561
572
562
573
while True :
563
- temp_collections , next_token = await self .database .get_catalog_collections (
564
- catalog_path = catalog_path ,
565
- token = token , # type: ignore
566
- limit = limit ,
567
- base_url = base_url ,
574
+ temp_collections , next_token , hit_tokens = (
575
+ await self .database .get_catalog_collections (
576
+ catalog_path = catalog_path ,
577
+ token = token , # type: ignore
578
+ limit = limit ,
579
+ base_url = base_url ,
580
+ )
568
581
)
569
582
570
583
# Check if current user has access to each collection
571
- for collection in temp_collections :
584
+ for i , (collection , hit_token ) in enumerate (
585
+ zip (temp_collections , hit_tokens )
586
+ ):
572
587
# Get access control array for each collection
573
588
access_control = collection ["access_control" ]
574
589
collection .pop ("access_control" )
575
590
# Remove collection from list if user does not have access
576
591
if int (access_control [- 1 ]) or int (access_control [user_index ]):
577
592
collections .append (collection )
578
593
if len (collections ) >= limit :
579
- break
594
+ if i < len (temp_collections ) - 1 :
595
+ # Extract token from last result
596
+ next_token = hit_token
597
+ break
580
598
581
599
# If collections now less than limit and more results, will need to run search again, giving next_token
582
600
if len (collections ) >= limit or not next_token :
583
601
# TODO: implement smarter token logic to return token of last returned ES entry
584
- next_token = token
585
602
break
586
603
token = next_token
587
604
@@ -674,7 +691,8 @@ async def item_collection(
674
691
675
692
search = self .database .apply_bbox_filter (search = search , bbox = bbox )
676
693
677
- items , maybe_count , next_token = await self .database .execute_search (
694
+ # No further access control needed as already checked above for collection
695
+ items , maybe_count , next_token , _ = await self .database .execute_search (
678
696
search = search ,
679
697
catalog_paths = [catalog_path ],
680
698
limit = limit ,
@@ -1046,17 +1064,19 @@ async def post_global_search(
1046
1064
items = []
1047
1065
1048
1066
while True :
1049
- temp_items , maybe_count , next_token = await self .database .execute_search (
1050
- search = search ,
1051
- limit = limit ,
1052
- token = token , # type: ignore
1053
- sort = sort ,
1054
- collection_ids = search_request .collections ,
1055
- catalog_paths = search_request .catalog_paths ,
1067
+ temp_items , maybe_count , next_token , hit_tokens = (
1068
+ await self .database .execute_search (
1069
+ search = search ,
1070
+ limit = limit ,
1071
+ token = token , # type: ignore
1072
+ sort = sort ,
1073
+ collection_ids = search_request .collections ,
1074
+ catalog_paths = search_request .catalog_paths ,
1075
+ )
1056
1076
)
1057
1077
1058
1078
# Filter results to those that are accessible to the user
1059
- for item in temp_items :
1079
+ for i , ( item , hit_token ) in enumerate ( zip ( temp_items , hit_tokens )) :
1060
1080
# Get item index for path extraction
1061
1081
item_catalog_path = item [1 ]
1062
1082
# Get parent collection if collection is present
@@ -1073,7 +1093,10 @@ async def post_global_search(
1073
1093
if int (access_control [- 1 ]) or int (access_control [user_index ]):
1074
1094
items .append (item )
1075
1095
if len (items ) >= limit :
1076
- break
1096
+ if i < len (temp_items ) - 1 :
1097
+ # Extract token from last result
1098
+ next_token = hit_token
1099
+ break
1077
1100
# Get parent catalog if collection is not present
1078
1101
else :
1079
1102
# Get access control array for this catalog
@@ -1085,12 +1108,14 @@ async def post_global_search(
1085
1108
if int (access_control [- 1 ]) or int (access_control [user_index ]):
1086
1109
items .append (item )
1087
1110
if len (items ) >= limit :
1088
- break
1111
+ # Extract token from last result
1112
+ if i < len (temp_items ) - 1 :
1113
+ next_token = hit_token
1114
+ break
1089
1115
1090
1116
# If items now less than limit and more results, will need to run search again, giving next_token
1091
1117
if len (items ) >= limit or not next_token :
1092
1118
# TODO: implement smarter token logic to return token of last returned ES entry
1093
- next_token = token
1094
1119
break
1095
1120
token = next_token
1096
1121
@@ -1264,7 +1289,6 @@ async def post_search(
1264
1289
username_header : dict ,
1265
1290
** kwargs ,
1266
1291
) -> ItemCollection :
1267
- print ("post search" )
1268
1292
"""
1269
1293
Perform a POST search on a specific sub-catalog.
1270
1294
@@ -1299,9 +1323,12 @@ async def post_search(
1299
1323
raise HTTPException (
1300
1324
status_code = 403 , detail = "User does not have access to this Catalog"
1301
1325
)
1326
+ collections = []
1327
+ if search_request .collections :
1328
+ collections = search_request .collections
1302
1329
1303
1330
# Filter the search collections to those that are accessible to the user
1304
- for collection_id in search_request . collections [:]:
1331
+ for collection_id in collections [:]:
1305
1332
# Filter the search catalogs to those that are accessible to the user
1306
1333
collection = await self .database .find_collection (
1307
1334
catalog_path = catalog_path , collection_id = collection_id
@@ -1310,7 +1337,7 @@ async def post_search(
1310
1337
access_control = collection ["access_control" ]
1311
1338
# Remove catalog from list if user does not have access
1312
1339
if not int (access_control [- 1 ]) and not int (access_control [user_index ]):
1313
- search_request . collections .remove (collection_id )
1340
+ collections .remove (collection_id )
1314
1341
1315
1342
search = self .database .make_search ()
1316
1343
@@ -1319,9 +1346,9 @@ async def post_search(
1319
1346
search = search , item_ids = search_request .ids
1320
1347
)
1321
1348
1322
- if search_request . collections :
1349
+ if collections :
1323
1350
search = self .database .apply_collections_filter (
1324
- search = search , collection_ids = search_request . collections
1351
+ search = search , collection_ids = collections
1325
1352
)
1326
1353
1327
1354
if search_request .datetime :
@@ -1375,17 +1402,19 @@ async def post_search(
1375
1402
items = []
1376
1403
1377
1404
while True :
1378
- temp_items , maybe_count , next_token = await self .database .execute_search (
1379
- search = search ,
1380
- limit = limit ,
1381
- token = token , # type: ignore
1382
- sort = sort ,
1383
- collection_ids = search_request .collections ,
1384
- catalog_paths = [catalog_path ],
1405
+ temp_items , maybe_count , next_token , hit_tokens = (
1406
+ await self .database .execute_search (
1407
+ search = search ,
1408
+ limit = limit ,
1409
+ token = token , # type: ignore
1410
+ sort = sort ,
1411
+ collection_ids = collections ,
1412
+ catalog_paths = [catalog_path ],
1413
+ )
1385
1414
)
1386
1415
1387
1416
# Filter results to those that are accessible to the user
1388
- for item in temp_items :
1417
+ for i , ( item , hit_token ) in enumerate ( zip ( temp_items , hit_tokens )) :
1389
1418
# Get item index for path extraction
1390
1419
item_catalog_path = item [1 ]
1391
1420
# Get parent collection if collection is present
@@ -1401,6 +1430,11 @@ async def post_search(
1401
1430
# Append item to list if user has access
1402
1431
if int (access_control [- 1 ]) or int (access_control [user_index ]):
1403
1432
items .append (item )
1433
+ if len (items ) >= limit :
1434
+ if i < len (temp_items ) - 1 :
1435
+ # Extract token from last result
1436
+ next_token = hit_token
1437
+ break
1404
1438
# Get parent catalog if collection is not present
1405
1439
else :
1406
1440
# Get access control array for this catalog
@@ -1411,11 +1445,15 @@ async def post_search(
1411
1445
# Append item to list if user has access
1412
1446
if int (access_control [- 1 ]) or int (access_control [user_index ]):
1413
1447
items .append (item )
1448
+ if len (items ) >= limit :
1449
+ if i < len (temp_items ) - 1 :
1450
+ # Extract token from last result
1451
+ next_token = hit_token
1452
+ break
1414
1453
1415
1454
# If items now less than limit and more results, will need to run search again, giving next_token
1416
1455
if len (items ) >= limit or not next_token :
1417
1456
# TODO: implement smarter token logic to return token of last returned ES entry
1418
- next_token = token
1419
1457
break
1420
1458
token = next_token
1421
1459
@@ -2254,7 +2292,7 @@ async def post_all_collections(
2254
2292
collections = []
2255
2293
2256
2294
while True :
2257
- temp_collections , _ , next_token = (
2295
+ temp_collections , _ , next_token , hit_tokens = (
2258
2296
await self .database .execute_collection_search (
2259
2297
search = search ,
2260
2298
limit = limit ,
@@ -2265,18 +2303,24 @@ async def post_all_collections(
2265
2303
)
2266
2304
2267
2305
# Filter results to those that are accessible to the user
2268
- for collection in temp_collections :
2306
+ for i , (collection , hit_token ) in enumerate (
2307
+ zip (temp_collections , hit_tokens )
2308
+ ):
2269
2309
# Get access control array for this collection
2270
2310
access_control = collection ["access_control" ]
2271
2311
collection .pop ("access_control" )
2272
2312
# Append collection to list if user has access
2273
2313
if int (access_control [- 1 ]) or int (access_control [user_index ]):
2274
2314
collections .append (collection )
2315
+ if len (collections ) >= limit :
2316
+ if i < len (temp_collections ) - 1 :
2317
+ # Extract token from last result
2318
+ next_token = hit_token
2319
+ break
2275
2320
2276
2321
# If collections now less than limit and more results, will need to run search again, giving next_token
2277
2322
if len (collections ) >= limit or not next_token :
2278
2323
# TODO: implement smarter token logic to return token of last returned ES entry
2279
- next_token = token
2280
2324
break
2281
2325
token = next_token
2282
2326
@@ -2411,7 +2455,7 @@ async def post_discovery_search(
2411
2455
catalogs_and_collections = []
2412
2456
2413
2457
while True :
2414
- temp_catalogs_and_collections , maybe_count , next_token = (
2458
+ temp_catalogs_and_collections , _ , next_token , hit_tokens = (
2415
2459
await self .database .execute_discovery_search (
2416
2460
search = search ,
2417
2461
limit = limit ,
@@ -2423,18 +2467,24 @@ async def post_discovery_search(
2423
2467
)
2424
2468
2425
2469
# Filter results to those that are accessible to the user
2426
- for data in temp_catalogs_and_collections :
2470
+ for i , (data , hit_token ) in enumerate (
2471
+ zip (temp_catalogs_and_collections , hit_tokens )
2472
+ ):
2427
2473
# Get access control array for this collection
2428
2474
access_control = data ["access_control" ]
2429
2475
data .pop ("access_control" )
2430
2476
# Append collection to list if user has access
2431
2477
if int (access_control [- 1 ]) or int (access_control [user_index ]):
2432
2478
catalogs_and_collections .append (data )
2479
+ if len (catalogs_and_collections ) >= limit :
2480
+ if i < len (temp_catalogs_and_collections ) - 1 :
2481
+ # Extract token from last result
2482
+ next_token = hit_token
2483
+ break
2433
2484
2434
2485
# If catalogs_and_collections now less than limit and more results, will need to run search again, giving next_token
2435
2486
if len (catalogs_and_collections ) >= limit or not next_token :
2436
2487
# TODO: implement smarter token logic to return token of last returned ES entry
2437
- next_token = token
2438
2488
break
2439
2489
token = next_token
2440
2490
0 commit comments