Skip to content

Commit ad2dc7e

Browse files
authored
Merge pull request #196 from stac-utils/remove_source
remove db logic from all collections in core.py
2 parents 5a81078 + dba90f3 commit ad2dc7e

File tree

4 files changed

+81
-86
lines changed

4 files changed

+81
-86
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1313

1414
### Changed
1515

16+
- Removed database logic from core.py all_collections [#196](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/196)
1617
- Changed OpenSearch config ssl_version to SSLv23 [#200](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/200)
1718

1819
### Fixed

stac_fastapi/core/stac_fastapi/core/core.py

Lines changed: 18 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Item crud client."""
22
import logging
33
import re
4-
from base64 import urlsafe_b64encode
54
from datetime import datetime as datetime_type
65
from datetime import timezone
76
from typing import Any, Dict, List, Optional, Set, Type, Union
@@ -193,66 +192,36 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage:
193192
async def all_collections(self, **kwargs) -> Collections:
194193
"""Read all collections from the database.
195194
196-
Returns:
197-
Collections: A `Collections` object containing all the collections in the database and
198-
links to various resources.
195+
Args:
196+
**kwargs: Keyword arguments from the request.
199197
200-
Raises:
201-
Exception: If any error occurs while reading the collections from the database.
198+
Returns:
199+
A Collections object containing all the collections in the database and links to various resources.
202200
"""
203-
request: Request = kwargs["request"]
204-
base_url = str(kwargs["request"].base_url)
201+
request = kwargs["request"]
202+
base_url = str(request.base_url)
203+
limit = int(request.query_params.get("limit", 10))
204+
token = request.query_params.get("token")
205205

206-
limit = (
207-
int(request.query_params["limit"])
208-
if "limit" in request.query_params
209-
else 10
210-
)
211-
token = (
212-
request.query_params["token"] if "token" in request.query_params else None
206+
collections, next_token = await self.database.get_all_collections(
207+
token=token, limit=limit, base_url=base_url
213208
)
214209

215-
hits = await self.database.get_all_collections(limit=limit, token=token)
216-
217-
next_search_after = None
218-
next_link = None
219-
if len(hits) == limit:
220-
last_hit = hits[-1]
221-
next_search_after = last_hit["sort"]
222-
next_token = urlsafe_b64encode(
223-
",".join(map(str, next_search_after)).encode()
224-
).decode()
225-
paging_links = PagingLinks(next=next_token, request=request)
226-
next_link = paging_links.link_next()
227-
228210
links = [
229-
{
230-
"rel": Relations.root.value,
231-
"type": MimeTypes.json,
232-
"href": base_url,
233-
},
234-
{
235-
"rel": Relations.parent.value,
236-
"type": MimeTypes.json,
237-
"href": base_url,
238-
},
211+
{"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url},
212+
{"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url},
239213
{
240214
"rel": Relations.self.value,
241215
"type": MimeTypes.json,
242216
"href": urljoin(base_url, "collections"),
243217
},
244218
]
245219

246-
if next_link:
220+
if next_token:
221+
next_link = PagingLinks(next=next_token, request=request).link_next()
247222
links.append(next_link)
248223

249-
return Collections(
250-
collections=[
251-
self.collection_serializer.db_to_stac(c["_source"], base_url=base_url)
252-
for c in hits
253-
],
254-
links=links,
255-
)
224+
return Collections(collections=collections, links=links)
256225

257226
async def get_collection(self, collection_id: str, **kwargs) -> Collection:
258227
"""Get a collection from the database by its id.
@@ -269,7 +238,9 @@ async def get_collection(self, collection_id: str, **kwargs) -> Collection:
269238
"""
270239
base_url = str(kwargs["request"].base_url)
271240
collection = await self.database.find_collection(collection_id=collection_id)
272-
return self.collection_serializer.db_to_stac(collection, base_url)
241+
return self.collection_serializer.db_to_stac(
242+
collection=collection, base_url=base_url
243+
)
273244

274245
async def item_collection(
275246
self,

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -291,33 +291,43 @@ class DatabaseLogic:
291291
"""CORE LOGIC"""
292292

293293
async def get_all_collections(
294-
self, token: Optional[str], limit: int
295-
) -> Iterable[Dict[str, Any]]:
296-
"""Retrieve a list of all collections from the database.
294+
self, token: Optional[str], limit: int, base_url: str
295+
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
296+
"""Retrieve a list of all collections from Elasticsearch, supporting pagination.
297297
298298
Args:
299-
token (Optional[str]): The token used to return the next set of results.
300-
limit (int): Number of results to return
299+
token (Optional[str]): The pagination token.
300+
limit (int): The number of results to return.
301301
302302
Returns:
303-
collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection.
304-
305-
Notes:
306-
The collections are retrieved from the Elasticsearch database using the `client.search` method,
307-
with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records.
308-
The result is a generator of dictionaries containing the source data for each collection.
303+
A tuple of (collections, next pagination token if any).
309304
"""
310305
search_after = None
311306
if token:
312-
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
313-
collections = await self.client.search(
307+
search_after = [token]
308+
309+
response = await self.client.search(
314310
index=COLLECTIONS_INDEX,
315-
search_after=search_after,
316-
size=limit,
317-
sort={"id": {"order": "asc"}},
311+
body={
312+
"sort": [{"id": {"order": "asc"}}],
313+
"size": limit,
314+
"search_after": search_after,
315+
},
318316
)
319-
hits = collections["hits"]["hits"]
320-
return hits
317+
318+
hits = response["hits"]["hits"]
319+
collections = [
320+
self.collection_serializer.db_to_stac(
321+
collection=hit["_source"], base_url=base_url
322+
)
323+
for hit in hits
324+
]
325+
326+
next_token = None
327+
if len(hits) == limit:
328+
next_token = hits[-1]["sort"][0]
329+
330+
return collections, next_token
321331

322332
async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
323333
"""Retrieve a single item from the database.

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -312,36 +312,49 @@ class DatabaseLogic:
312312
"""CORE LOGIC"""
313313

314314
async def get_all_collections(
315-
self,
316-
token: Optional[str],
317-
limit: int,
318-
) -> Iterable[Dict[str, Any]]:
319-
"""Retrieve a list of all collections from the database.
315+
self, token: Optional[str], limit: int, base_url: str
316+
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
317+
"""
318+
Retrieve a list of all collections from Opensearch, supporting pagination.
320319
321320
Args:
322-
token (Optional[str]): The token used to return the next set of results.
323-
limit (int): Number of results to return
321+
token (Optional[str]): The pagination token.
322+
limit (int): The number of results to return.
324323
325324
Returns:
326-
collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection.
327-
328-
Notes:
329-
The collections are retrieved from the Elasticsearch database using the `client.search` method,
330-
with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records.
331-
The result is a generator of dictionaries containing the source data for each collection.
325+
A tuple of (collections, next pagination token if any).
332326
"""
333-
search_body: Dict[str, Any] = {}
327+
search_body = {
328+
"sort": [{"id": {"order": "asc"}}],
329+
"size": limit,
330+
}
331+
332+
# Only add search_after to the query if token is not None and not empty
334333
if token:
335-
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
334+
search_after = [token]
336335
search_body["search_after"] = search_after
337336

338-
search_body["sort"] = {"id": {"order": "asc"}}
339-
340-
collections = await self.client.search(
341-
index=COLLECTIONS_INDEX, body=search_body, size=limit
337+
response = await self.client.search(
338+
index="collections",
339+
body=search_body,
342340
)
343-
hits = collections["hits"]["hits"]
344-
return hits
341+
342+
hits = response["hits"]["hits"]
343+
collections = [
344+
self.collection_serializer.db_to_stac(
345+
collection=hit["_source"], base_url=base_url
346+
)
347+
for hit in hits
348+
]
349+
350+
next_token = None
351+
if len(hits) == limit:
352+
# Ensure we have a valid sort value for next_token
353+
next_token_values = hits[-1].get("sort")
354+
if next_token_values:
355+
next_token = next_token_values[0]
356+
357+
return collections, next_token
345358

346359
async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
347360
"""Retrieve a single item from the database.

0 commit comments

Comments
 (0)