Skip to content

Commit 2c1e095

Browse files
Merge pull request #23 from EO-DataHub/bugfix/EODHP-549-fastapi-ensure-next-link-only-included-when-subsequent-results-exist
Bugfix/eodhp 549 fastapi ensure next link only included when subsequent results exist
2 parents 2ce2c33 + 62d19a2 commit 2c1e095

File tree

2 files changed

+222
-104
lines changed

2 files changed

+222
-104
lines changed

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 105 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -273,19 +273,26 @@ async def all_collections(self, username_header: dict, **kwargs) -> Collections:
273273
collections = []
274274

275275
while True:
276-
temp_collections, next_token = await self.database.get_all_collections(
277-
token=token, limit=limit, base_url=base_url
276+
temp_collections, next_token, hit_tokens = (
277+
await self.database.get_all_collections(
278+
token=token, limit=limit, base_url=base_url
279+
)
278280
)
279281

280-
for collection in temp_collections:
282+
for i, (collection, hit_token) in enumerate(
283+
zip(temp_collections, hit_tokens)
284+
):
281285
# Get access control array for each collection
282286
access_control = collection["access_control"]
283287
collection.pop("access_control")
284288
# Append collection to list if user has access
285289
if int(access_control[-1]) or int(access_control[user_index]):
286290
collections.append(collection)
287291
if len(collections) >= limit:
288-
break
292+
# Extract token from last result
293+
if i < len(temp_collections) - 1:
294+
next_token = hit_token
295+
break
289296

290297
# If collections now less than limit and more results, will need to run search again, giving next_token
291298
if len(collections) >= limit or not next_token:
@@ -338,29 +345,33 @@ async def all_catalogs(
338345

339346
while True:
340347
# Search is run continually until limit is reached or no more results
341-
temp_catalogs, next_token = await self.database.get_all_catalogs(
342-
catalog_path=catalog_path,
343-
token=token,
344-
limit=limit,
345-
base_url=base_url,
346-
user_index=user_index,
347-
conformance_classes=self.conformance_classes(),
348+
temp_catalogs, next_token, hit_tokens = (
349+
await self.database.get_all_catalogs(
350+
catalog_path=catalog_path,
351+
token=token,
352+
limit=limit,
353+
base_url=base_url,
354+
user_index=user_index,
355+
conformance_classes=self.conformance_classes(),
356+
)
348357
)
349358

350-
for catalog in temp_catalogs:
359+
for i, (catalog, hit_token) in enumerate(zip(temp_catalogs, hit_tokens)):
351360
# Get access control array for each catalog
352361
access_control = catalog["access_control"]
353362
catalog.pop("access_control")
354363
# Add catalog to list if user has access
355364
if int(access_control[-1]) or int(access_control[user_index]):
356365
catalogs.append(catalog)
357366
if len(catalogs) >= limit:
358-
break
367+
if i < len(temp_catalogs) - 1:
368+
# Extract token from last result
369+
next_token = hit_token
370+
break
359371

360372
# If catalogs now less than limit and more results, will need to run search again, giving next_token
361373
if len(catalogs) >= limit or not next_token:
362374
# TODO: implement smarter token logic to return token of last returned ES entry
363-
next_token = token
364375
break
365376
token = next_token
366377

@@ -465,7 +476,7 @@ async def get_catalog(
465476
)
466477

467478
# Assume at most 100 collections in a catalog for the time being, may need to increase
468-
collections, _ = await self.database.get_catalog_collections(
479+
collections, _, _ = await self.database.get_catalog_collections(
469480
catalog_path=catalog_path,
470481
base_url=base_url,
471482
limit=NUMBER_OF_CATALOG_COLLECTIONS,
@@ -560,28 +571,34 @@ async def get_catalog_collections(
560571
collections = []
561572

562573
while True:
563-
temp_collections, next_token = await self.database.get_catalog_collections(
564-
catalog_path=catalog_path,
565-
token=token, # type: ignore
566-
limit=limit,
567-
base_url=base_url,
574+
temp_collections, next_token, hit_tokens = (
575+
await self.database.get_catalog_collections(
576+
catalog_path=catalog_path,
577+
token=token, # type: ignore
578+
limit=limit,
579+
base_url=base_url,
580+
)
568581
)
569582

570583
# Check if current user has access to each collection
571-
for collection in temp_collections:
584+
for i, (collection, hit_token) in enumerate(
585+
zip(temp_collections, hit_tokens)
586+
):
572587
# Get access control array for each collection
573588
access_control = collection["access_control"]
574589
collection.pop("access_control")
575590
# Remove collection from list if user does not have access
576591
if int(access_control[-1]) or int(access_control[user_index]):
577592
collections.append(collection)
578593
if len(collections) >= limit:
579-
break
594+
if i < len(temp_collections) - 1:
595+
# Extract token from last result
596+
next_token = hit_token
597+
break
580598

581599
# If collections now less than limit and more results, will need to run search again, giving next_token
582600
if len(collections) >= limit or not next_token:
583601
# TODO: implement smarter token logic to return token of last returned ES entry
584-
next_token = token
585602
break
586603
token = next_token
587604

@@ -674,7 +691,8 @@ async def item_collection(
674691

675692
search = self.database.apply_bbox_filter(search=search, bbox=bbox)
676693

677-
items, maybe_count, next_token = await self.database.execute_search(
694+
# No further access control needed as already checked above for collection
695+
items, maybe_count, next_token, _ = await self.database.execute_search(
678696
search=search,
679697
catalog_paths=[catalog_path],
680698
limit=limit,
@@ -1046,17 +1064,19 @@ async def post_global_search(
10461064
items = []
10471065

10481066
while True:
1049-
temp_items, maybe_count, next_token = await self.database.execute_search(
1050-
search=search,
1051-
limit=limit,
1052-
token=token, # type: ignore
1053-
sort=sort,
1054-
collection_ids=search_request.collections,
1055-
catalog_paths=search_request.catalog_paths,
1067+
temp_items, maybe_count, next_token, hit_tokens = (
1068+
await self.database.execute_search(
1069+
search=search,
1070+
limit=limit,
1071+
token=token, # type: ignore
1072+
sort=sort,
1073+
collection_ids=search_request.collections,
1074+
catalog_paths=search_request.catalog_paths,
1075+
)
10561076
)
10571077

10581078
# Filter results to those that are accessible to the user
1059-
for item in temp_items:
1079+
for i, (item, hit_token) in enumerate(zip(temp_items, hit_tokens)):
10601080
# Get item index for path extraction
10611081
item_catalog_path = item[1]
10621082
# Get parent collection if collection is present
@@ -1073,7 +1093,10 @@ async def post_global_search(
10731093
if int(access_control[-1]) or int(access_control[user_index]):
10741094
items.append(item)
10751095
if len(items) >= limit:
1076-
break
1096+
if i < len(temp_items) - 1:
1097+
# Extract token from last result
1098+
next_token = hit_token
1099+
break
10771100
# Get parent catalog if collection is not present
10781101
else:
10791102
# Get access control array for this catalog
@@ -1085,12 +1108,14 @@ async def post_global_search(
10851108
if int(access_control[-1]) or int(access_control[user_index]):
10861109
items.append(item)
10871110
if len(items) >= limit:
1088-
break
1111+
# Extract token from last result
1112+
if i < len(temp_items) - 1:
1113+
next_token = hit_token
1114+
break
10891115

10901116
# If items now less than limit and more results, will need to run search again, giving next_token
10911117
if len(items) >= limit or not next_token:
10921118
# TODO: implement smarter token logic to return token of last returned ES entry
1093-
next_token = token
10941119
break
10951120
token = next_token
10961121

@@ -1264,7 +1289,6 @@ async def post_search(
12641289
username_header: dict,
12651290
**kwargs,
12661291
) -> ItemCollection:
1267-
print("post search")
12681292
"""
12691293
Perform a POST search on a specific sub-catalog.
12701294
@@ -1299,9 +1323,12 @@ async def post_search(
12991323
raise HTTPException(
13001324
status_code=403, detail="User does not have access to this Catalog"
13011325
)
1326+
collections = []
1327+
if search_request.collections:
1328+
collections = search_request.collections
13021329

13031330
# Filter the search collections to those that are accessible to the user
1304-
for collection_id in search_request.collections[:]:
1331+
for collection_id in collections[:]:
13051332
# Filter the search catalogs to those that are accessible to the user
13061333
collection = await self.database.find_collection(
13071334
catalog_path=catalog_path, collection_id=collection_id
@@ -1310,7 +1337,7 @@ async def post_search(
13101337
access_control = collection["access_control"]
13111338
# Remove catalog from list if user does not have access
13121339
if not int(access_control[-1]) and not int(access_control[user_index]):
1313-
search_request.collections.remove(collection_id)
1340+
collections.remove(collection_id)
13141341

13151342
search = self.database.make_search()
13161343

@@ -1319,9 +1346,9 @@ async def post_search(
13191346
search=search, item_ids=search_request.ids
13201347
)
13211348

1322-
if search_request.collections:
1349+
if collections:
13231350
search = self.database.apply_collections_filter(
1324-
search=search, collection_ids=search_request.collections
1351+
search=search, collection_ids=collections
13251352
)
13261353

13271354
if search_request.datetime:
@@ -1375,17 +1402,19 @@ async def post_search(
13751402
items = []
13761403

13771404
while True:
1378-
temp_items, maybe_count, next_token = await self.database.execute_search(
1379-
search=search,
1380-
limit=limit,
1381-
token=token, # type: ignore
1382-
sort=sort,
1383-
collection_ids=search_request.collections,
1384-
catalog_paths=[catalog_path],
1405+
temp_items, maybe_count, next_token, hit_tokens = (
1406+
await self.database.execute_search(
1407+
search=search,
1408+
limit=limit,
1409+
token=token, # type: ignore
1410+
sort=sort,
1411+
collection_ids=collections,
1412+
catalog_paths=[catalog_path],
1413+
)
13851414
)
13861415

13871416
# Filter results to those that are accessible to the user
1388-
for item in temp_items:
1417+
for i, (item, hit_token) in enumerate(zip(temp_items, hit_tokens)):
13891418
# Get item index for path extraction
13901419
item_catalog_path = item[1]
13911420
# Get parent collection if collection is present
@@ -1401,6 +1430,11 @@ async def post_search(
14011430
# Append item to list if user has access
14021431
if int(access_control[-1]) or int(access_control[user_index]):
14031432
items.append(item)
1433+
if len(items) >= limit:
1434+
if i < len(temp_items) - 1:
1435+
# Extract token from last result
1436+
next_token = hit_token
1437+
break
14041438
# Get parent catalog if collection is not present
14051439
else:
14061440
# Get access control array for this catalog
@@ -1411,11 +1445,15 @@ async def post_search(
14111445
# Append item to list if user has access
14121446
if int(access_control[-1]) or int(access_control[user_index]):
14131447
items.append(item)
1448+
if len(items) >= limit:
1449+
if i < len(temp_items) - 1:
1450+
# Extract token from last result
1451+
next_token = hit_token
1452+
break
14141453

14151454
# If items now less than limit and more results, will need to run search again, giving next_token
14161455
if len(items) >= limit or not next_token:
14171456
# TODO: implement smarter token logic to return token of last returned ES entry
1418-
next_token = token
14191457
break
14201458
token = next_token
14211459

@@ -2254,7 +2292,7 @@ async def post_all_collections(
22542292
collections = []
22552293

22562294
while True:
2257-
temp_collections, _, next_token = (
2295+
temp_collections, _, next_token, hit_tokens = (
22582296
await self.database.execute_collection_search(
22592297
search=search,
22602298
limit=limit,
@@ -2265,18 +2303,24 @@ async def post_all_collections(
22652303
)
22662304

22672305
# Filter results to those that are accessible to the user
2268-
for collection in temp_collections:
2306+
for i, (collection, hit_token) in enumerate(
2307+
zip(temp_collections, hit_tokens)
2308+
):
22692309
# Get access control array for this collection
22702310
access_control = collection["access_control"]
22712311
collection.pop("access_control")
22722312
# Append collection to list if user has access
22732313
if int(access_control[-1]) or int(access_control[user_index]):
22742314
collections.append(collection)
2315+
if len(collections) >= limit:
2316+
if i < len(temp_collections) - 1:
2317+
# Extract token from last result
2318+
next_token = hit_token
2319+
break
22752320

22762321
# If collections now less than limit and more results, will need to run search again, giving next_token
22772322
if len(collections) >= limit or not next_token:
22782323
# TODO: implement smarter token logic to return token of last returned ES entry
2279-
next_token = token
22802324
break
22812325
token = next_token
22822326

@@ -2411,7 +2455,7 @@ async def post_discovery_search(
24112455
catalogs_and_collections = []
24122456

24132457
while True:
2414-
temp_catalogs_and_collections, maybe_count, next_token = (
2458+
temp_catalogs_and_collections, _, next_token, hit_tokens = (
24152459
await self.database.execute_discovery_search(
24162460
search=search,
24172461
limit=limit,
@@ -2423,18 +2467,24 @@ async def post_discovery_search(
24232467
)
24242468

24252469
# Filter results to those that are accessible to the user
2426-
for data in temp_catalogs_and_collections:
2470+
for i, (data, hit_token) in enumerate(
2471+
zip(temp_catalogs_and_collections, hit_tokens)
2472+
):
24272473
# Get access control array for this collection
24282474
access_control = data["access_control"]
24292475
data.pop("access_control")
24302476
# Append collection to list if user has access
24312477
if int(access_control[-1]) or int(access_control[user_index]):
24322478
catalogs_and_collections.append(data)
2479+
if len(catalogs_and_collections) >= limit:
2480+
if i < len(temp_catalogs_and_collections) - 1:
2481+
# Extract token from last result
2482+
next_token = hit_token
2483+
break
24332484

24342485
# If catalogs_and_collections now less than limit and more results, will need to run search again, giving next_token
24352486
if len(catalogs_and_collections) >= limit or not next_token:
24362487
# TODO: implement smarter token logic to return token of last returned ES entry
2437-
next_token = token
24382488
break
24392489
token = next_token
24402490

0 commit comments

Comments
 (0)