From c3669c1d136968026cc5a77bb5aea5448d84c511 Mon Sep 17 00:00:00 2001 From: Phil Varner Date: Thu, 12 May 2022 13:17:20 -0400 Subject: [PATCH] improve queryables, add documentation for copying indicies --- README.md | 102 ++++++++++++++++++ .../stac_fastapi/elasticsearch/core.py | 13 ++- .../elasticsearch/extensions/filter.py | 1 + 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 55ff008b..5a9a5e96 100644 --- a/README.md +++ b/README.md @@ -72,3 +72,105 @@ make ingest ## Elasticsearch Mappings Mappings apply to search index, not source. + +## Managing Elasticsearch Indices + +This section covers how to create a snapshot repository and then create and restore snapshots with this. + +Create a snapshot repository. This puts the files in the `elasticsearch/snapshots` in this git repo clone, as +the elasticsearch.yml and docker-compose files create a mapping from that directory to +`/usr/share/elasticsearch/snapshots` within the Elasticsearch container and grant permissions on using it. + +``` +curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "type": "fs", + "settings": { + "location": "/usr/share/elasticsearch/snapshots/my_fs_backup" + } +}' +``` + +The next step is to create a snapshot of one or more indices into this snapshot repository. This command creates +a snapshot named `my_snapshot_2` and waits for the action to be completed before returning. This can also be done +asynchronously, and queried for status. The `indices` parameter determines which indices are snapshotted, and +can include wildcards. + +``` +curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "metadata": { + "taken_because": "dump of all items", + "taken_by": "pvarner" + }, + "include_global_state": false, + "ignore_unavailable": false, + "indices": "items_my-collection" +}' +``` + +To see the status of this snapshot: + +``` +curl http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2 +``` + +To see all the snapshots: + +``` +curl http://localhost:9200/_snapshot/my_fs_backup/_all +``` + +To restore a snapshot, run something similar to the following. This specific command will restore any indices that +match `items_*` and rename them so that the new index name will be suffixed with `-copy`. + +``` +curl -X "POST" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2/_restore?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "include_aliases": false, + "include_global_state": false, + "ignore_unavailable": true, + "rename_replacement": "items_$1-copy", + "indices": "items_*", + "rename_pattern": "items_(.+)" +}' + +``` + +Now the item documents have been restored in to the new index (e.g., `my-collection-copy`), but the value of the +`collection` field in those documents is still the original value of `my-collection`. To update these to match the +new collection name, run the following Elasticsearch Update By Query command, substituting the old collection name +into the term filter and the new collection name into the script parameter: + +``` +curl -X "POST" "http://localhost:9200/items_my-collection-copy/_update_by_query" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "query": { + "match_all": {} +}, + "script": { + "lang": "painless", + "params": { + "collection": "my-collection-copy" + }, + "source": "ctx._source.collection = params.collection" + } +}' +``` + +Then, create a new collection through the api with the new name for each of the restored indices: + +``` +curl -X "POST" "http://localhost:8080/collections" \ + -H 'Content-Type: application/json' \ + -d $'{ + "id": "my-collection-copy" +}' +``` + +Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be +correctly queried by collection name. diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py index 8c8a4df4..2f6d3599 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py @@ -509,15 +509,15 @@ async def get_queryables( "properties": { "id": { "description": "ID", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/id", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", }, "collection": { "description": "Collection", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/collection", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", }, "geometry": { "description": "Geometry", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/geometry", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", }, "datetime": { "description": "Acquisition Timestamp", @@ -542,6 +542,13 @@ async def get_queryables( "minimum": 0, "maximum": 100, }, + "nodata_pixel_percentage": { + "description": "No Data Pixel Percentage", + "title": "No Data Pixel Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, }, "additionalProperties": True, } diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py index 0a077703..30f1afcb 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py @@ -33,6 +33,7 @@ "updated": "properties.updated", "cloud_cover": "properties.eo:cloud_cover", "cloud_shadow_percentage": "properties.s2:cloud_shadow_percentage", + "nodata_pixel_percentage": "properties.s2:nodata_pixel_percentage", }