diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c5dc40c..3b7ec978 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- use index templates for Collection and Item indices [#208](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions/208) - Added API `title`, `version`, and `description` parameters from environment variables `STAC_FASTAPI_TITLE`, `STAC_FASTAPI_VERSION` and `STAC_FASTAPI_DESCRIPTION`, respectively. [#207](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/207) ### Changed diff --git a/README.md b/README.md index fd0ab0fe..a6d04609 100644 --- a/README.md +++ b/README.md @@ -116,10 +116,12 @@ make ingest ## Elasticsearch Mappings -Mappings apply to search index, not source. +Mappings apply to search index, not source. The mappings are stored in index templates on application startup. +These templates will be used implicitly when creating new Collection and Item indices. ## Managing Elasticsearch Indices +### Snapshots This section covers how to create a snapshot repository and then create and restore snapshots with this. @@ -219,3 +221,52 @@ curl -X "POST" "http://localhost:8080/collections" \ Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be correctly queried by collection name. + +### Reindexing +This section covers how to reindex documents stored in Elasticsearch/OpenSearch. +A reindex operation might be useful to apply changes to documents or to correct dynamically generated mappings. + +The index templates will make sure that manually created indices will also have the correct mappings and settings. + +In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase. + +```shell +curl -X "POST" "http://localhost:9200/_reindex" \ + -H 'Content-Type: application/json' \ + -d $'{ + "source": { + "index": "items_my-collection-000001" + }, + "dest": { + "index": "items_my-collection-000002" + }, + "script": { + "source": "ctx._source.id = ctx._source.id.toLowerCase()", + "lang": "painless" + } + }' +``` + +If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`. +```shell +curl -X "POST" "http://localhost:9200/_aliases" \ + -h 'Content-Type: application/json' \ + -d $'{ + "actions": [ + { + "remove": { + "index": "*", + "alias": "items_my-collection" + } + }, + { + "add": { + "index": "items_my-collection-000002", + "alias": "items_my-collection" + } + } + ] + }' +``` + +The modified Items with lowercase identifiers will now be visible to users accessing `my-collection` in the STAC API. \ No newline at end of file diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 6f204ed8..476c9fd3 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -16,6 +16,7 @@ from stac_fastapi.elasticsearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) from stac_fastapi.extensions.core import ( ContextExtension, @@ -78,6 +79,7 @@ @app.on_event("startup") async def _startup_event() -> None: + await create_index_templates() await create_collection_index() diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 87ca8916..49c44a60 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -171,9 +171,36 @@ def indices(collection_ids: Optional[List[str]]) -> str: return ",".join([index_by_collection_id(c) for c in collection_ids]) +async def create_index_templates() -> None: + """ + Create index templates for the Collection and Item indices. + + Returns: + None + + """ + client = AsyncElasticsearchSettings().create_client + await client.indices.put_template( + name=f"template_{COLLECTIONS_INDEX}", + body={ + "index_patterns": [f"{COLLECTIONS_INDEX}*"], + "mappings": ES_COLLECTIONS_MAPPINGS, + }, + ) + await client.indices.put_template( + name=f"template_{ITEMS_INDEX_PREFIX}", + body={ + "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], + "settings": ES_ITEMS_SETTINGS, + "mappings": ES_ITEMS_MAPPINGS, + }, + ) + await client.close() + + async def create_collection_index() -> None: """ - Create the index for a Collection. + Create the index for a Collection. The settings of the index template will be used implicitly. Returns: None @@ -184,14 +211,13 @@ async def create_collection_index() -> None: await client.options(ignore_status=400).indices.create( index=f"{COLLECTIONS_INDEX}-000001", aliases={COLLECTIONS_INDEX: {}}, - mappings=ES_COLLECTIONS_MAPPINGS, ) await client.close() async def create_item_index(collection_id: str): """ - Create the index for Items. + Create the index for Items. The settings of the index template will be used implicitly. Args: collection_id (str): Collection identifier. @@ -206,8 +232,6 @@ async def create_item_index(collection_id: str): await client.options(ignore_status=400).indices.create( index=f"{index_by_collection_id(collection_id)}-000001", aliases={index_name: {}}, - mappings=ES_ITEMS_MAPPINGS, - settings=ES_ITEMS_SETTINGS, ) await client.close() diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 79a95a90..2db1c4fd 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -25,6 +25,7 @@ from stac_fastapi.opensearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) settings = OpensearchSettings() @@ -78,6 +79,7 @@ @app.on_event("startup") async def _startup_event() -> None: + await create_index_templates() await create_collection_index() diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 0f4bf9cf..d174e295 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -173,9 +173,36 @@ def indices(collection_ids: Optional[List[str]]) -> str: return ",".join([index_by_collection_id(c) for c in collection_ids]) +async def create_index_templates() -> None: + """ + Create index templates for the Collection and Item indices. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + await client.indices.put_template( + name=f"template_{COLLECTIONS_INDEX}", + body={ + "index_patterns": [f"{COLLECTIONS_INDEX}*"], + "mappings": ES_COLLECTIONS_MAPPINGS, + }, + ) + await client.indices.put_template( + name=f"template_{ITEMS_INDEX_PREFIX}", + body={ + "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], + "settings": ES_ITEMS_SETTINGS, + "mappings": ES_ITEMS_MAPPINGS, + }, + ) + await client.close() + + async def create_collection_index() -> None: """ - Create the index for a Collection. + Create the index for a Collection. The settings of the index template will be used implicitly. Returns: None @@ -183,8 +210,7 @@ async def create_collection_index() -> None: """ client = AsyncSearchSettings().create_client - search_body = { - "mappings": ES_COLLECTIONS_MAPPINGS, + search_body: Dict[str, Any] = { "aliases": {COLLECTIONS_INDEX: {}}, } @@ -203,7 +229,7 @@ async def create_collection_index() -> None: async def create_item_index(collection_id: str): """ - Create the index for Items. + Create the index for Items. The settings of the index template will be used implicitly. Args: collection_id (str): Collection identifier. @@ -214,10 +240,8 @@ async def create_item_index(collection_id: str): """ client = AsyncSearchSettings().create_client index_name = index_by_collection_id(collection_id) - search_body = { + search_body: Dict[str, Any] = { "aliases": {index_name: {}}, - "mappings": ES_ITEMS_MAPPINGS, - "settings": ES_ITEMS_SETTINGS, } try: diff --git a/stac_fastapi/tests/conftest.py b/stac_fastapi/tests/conftest.py index 01160ee1..227509c9 100644 --- a/stac_fastapi/tests/conftest.py +++ b/stac_fastapi/tests/conftest.py @@ -23,6 +23,7 @@ from stac_fastapi.opensearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) else: from stac_fastapi.elasticsearch.config import ( @@ -32,6 +33,7 @@ from stac_fastapi.elasticsearch.database_logic import ( DatabaseLogic, create_collection_index, + create_index_templates, ) from stac_fastapi.extensions.core import ( # FieldsExtension, @@ -215,6 +217,7 @@ async def app(): @pytest_asyncio.fixture(scope="session") async def app_client(app): + await create_index_templates() await create_collection_index() async with AsyncClient(app=app, base_url="http://test-server") as c: diff --git a/stac_fastapi/tests/database/__init__.py b/stac_fastapi/tests/database/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py new file mode 100644 index 00000000..3f7fe5a8 --- /dev/null +++ b/stac_fastapi/tests/database/test_database.py @@ -0,0 +1,51 @@ +import os +import uuid +from copy import deepcopy + +import pytest + +from ..conftest import MockRequest, database + +if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.database_logic import ( + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + index_by_collection_id, + ) +else: + from stac_fastapi.elasticsearch.database_logic import ( + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + index_by_collection_id, + ) + + +@pytest.mark.asyncio +async def test_index_mapping_collections(ctx): + response = await database.client.indices.get_mapping(index=COLLECTIONS_INDEX) + if not isinstance(response, dict): + response = response.body + actual_mappings = next(iter(response.values()))["mappings"] + assert ( + actual_mappings["dynamic_templates"] + == ES_COLLECTIONS_MAPPINGS["dynamic_templates"] + ) + + +@pytest.mark.asyncio +async def test_index_mapping_items(ctx, txn_client): + collection = deepcopy(ctx.collection) + collection["id"] = str(uuid.uuid4()) + await txn_client.create_collection(collection, request=MockRequest) + response = await database.client.indices.get_mapping( + index=index_by_collection_id(collection["id"]) + ) + if not isinstance(response, dict): + response = response.body + actual_mappings = next(iter(response.values()))["mappings"] + assert ( + actual_mappings["dynamic_templates"] == ES_ITEMS_MAPPINGS["dynamic_templates"] + ) + await txn_client.delete_collection(collection["id"])