From 972f16046622242bd65ceba4d48ede6b139007f4 Mon Sep 17 00:00:00 2001 From: rhysrevans3 Date: Wed, 29 Jan 2025 13:02:05 +0000 Subject: [PATCH 1/4] Allow capitals in item index aliases. --- CHANGELOG.md | 2 + .../elasticsearch/database_logic.py | 34 +++++++++++------ .../stac_fastapi/opensearch/database_logic.py | 38 +++++++++++++------ 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e41a1462..581e4086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +- Update item index naming and aliasing to allow capitalisation of collection ids [#](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/) + ## [v3.2.2] - 2024-12-15 ### Changed diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c404b5e5..0f272218 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -156,7 +156,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -172,7 +185,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -231,11 +244,10 @@ async def create_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - index_name = index_by_collection_id(collection_id) await client.options(ignore_status=400).indices.create( index=f"{index_by_collection_id(collection_id)}-000001", - aliases={index_name: {}}, + aliases={index_alias_by_collection_id(collection_id): {}}, ) await client.close() @@ -248,7 +260,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncElasticsearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -288,7 +300,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -449,7 +461,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -808,7 +820,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -845,7 +857,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -871,7 +883,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), document=item, refresh=refresh, @@ -897,7 +909,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 63a42427..498c9c01 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -158,7 +158,20 @@ def index_by_collection_id(collection_id: str) -> str: Returns: str: The index name derived from the collection id. """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" + + +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" def indices(collection_ids: Optional[List[str]]) -> str: @@ -174,7 +187,7 @@ def indices(collection_ids: Optional[List[str]]) -> str: if collection_ids is None or collection_ids == []: return ITEM_INDICES else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) + return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) async def create_index_templates() -> None: @@ -243,13 +256,14 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - index_name = index_by_collection_id(collection_id) search_body: Dict[str, Any] = { - "aliases": {index_name: {}}, + "aliases": {index_alias_by_collection_id(collection_id): {}}, } try: - await client.indices.create(index=f"{index_name}-000001", body=search_body) + await client.indices.create( + index=f"{index_by_collection_id(collection_id)}-000001", body=search_body + ) except TransportError as e: if e.status_code == 400: pass # Ignore 400 status codes @@ -267,7 +281,7 @@ async def delete_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client - name = index_by_collection_id(collection_id) + name = index_alias_by_collection_id(collection_id) resolved = await client.indices.resolve_index(name=name) if "aliases" in resolved and resolved["aliases"]: [alias] = resolved["aliases"] @@ -307,7 +321,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]): """ return [ { - "_index": index_by_collection_id(collection_id), + "_index": index_alias_by_collection_id(collection_id), "_id": mk_item_id(item["id"], item["collection"]), "_source": item, } @@ -476,7 +490,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ try: item = await self.client.get( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ) except exceptions.NotFoundError: @@ -838,7 +852,7 @@ async def prep_create_item( await self.check_collection_exists(collection_id=item["collection"]) if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), + index=index_alias_by_collection_id(item["collection"]), id=mk_item_id(item["id"], item["collection"]), ): raise ConflictError( @@ -875,7 +889,7 @@ def sync_prep_create_item( raise NotFoundError(f"Collection {collection_id} does not exist") if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), ): raise ConflictError( @@ -901,7 +915,7 @@ async def create_item(self, item: Item, refresh: bool = False): item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), body=item, refresh=refresh, @@ -927,7 +941,7 @@ async def delete_item( """ try: await self.client.delete( - index=index_by_collection_id(collection_id), + index=index_alias_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) From 7f7bc91611867d84fc72197cc1f4e5163a5337a9 Mon Sep 17 00:00:00 2001 From: rhysrevans3 Date: Wed, 29 Jan 2025 13:09:23 +0000 Subject: [PATCH 2/4] Update changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 581e4086..5d2b49ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] -- Update item index naming and aliasing to allow capitalisation of collection ids [#](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/) +- Update item index naming and aliasing to allow capitalisation of collection ids [#329](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/329) ## [v3.2.2] - 2024-12-15 From 1ea1741b25ec84e836cb3625dd5ffc9e2f9c7950 Mon Sep 17 00:00:00 2001 From: rhysrevans3 Date: Wed, 29 Jan 2025 13:11:04 +0000 Subject: [PATCH 3/4] Updating test db. --- stac_fastapi/tests/database/test_database.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py index 80acd82c..a5a01e60 100644 --- a/stac_fastapi/tests/database/test_database.py +++ b/stac_fastapi/tests/database/test_database.py @@ -11,14 +11,14 @@ COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) else: from stac_fastapi.elasticsearch.database_logic import ( COLLECTIONS_INDEX, ES_COLLECTIONS_MAPPINGS, ES_ITEMS_MAPPINGS, - index_by_collection_id, + index_alias_by_collection_id, ) @@ -42,7 +42,7 @@ async def test_index_mapping_items(txn_client, load_test_data): api.Collection(**collection), request=MockRequest ) response = await database.client.indices.get_mapping( - index=index_by_collection_id(collection["id"]) + index=index_alias_by_collection_id(collection["id"]) ) if not isinstance(response, dict): response = response.body From 8150b474c10ec03074e08ae80103191aa1f2f856 Mon Sep 17 00:00:00 2001 From: rhysrevans3 Date: Wed, 29 Jan 2025 13:21:24 +0000 Subject: [PATCH 4/4] Updating docs. --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1783422c..15c744df 100644 --- a/README.md +++ b/README.md @@ -267,17 +267,17 @@ A reindex operation might be useful to apply changes to documents or to correct The index templates will make sure that manually created indices will also have the correct mappings and settings. -In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase. +In this example, we will make a copy of an existing Item index `items_my-collection-lower_my-collection-hex-000001` but change the Item identifier to be lowercase. ```shell curl -X "POST" "http://localhost:9200/_reindex" \ -H 'Content-Type: application/json' \ -d $'{ "source": { - "index": "items_my-collection-000001" + "index": "items_my-collection-lower_my-collection-hex-000001" }, "dest": { - "index": "items_my-collection-000002" + "index": "items_my-collection-lower_my-collection-hex-000002" }, "script": { "source": "ctx._source.id = ctx._source.id.toLowerCase()", @@ -286,7 +286,7 @@ curl -X "POST" "http://localhost:9200/_reindex" \ }' ``` -If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`. +If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-lower_my-collection-hex-000002`. ```shell curl -X "POST" "http://localhost:9200/_aliases" \ -h 'Content-Type: application/json' \ @@ -300,7 +300,7 @@ curl -X "POST" "http://localhost:9200/_aliases" \ }, { "add": { - "index": "items_my-collection-000002", + "index": "items_my-collection-lower_my-collection-hex-000002", "alias": "items_my-collection" } }