From bf2335cbbd3fc9b602b8a65f67046181a8931176 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 13 Feb 2024 11:37:23 +0800 Subject: [PATCH 01/25] move dockerfiles, add mongodb --- Makefile | 4 ++-- docker-compose.yml | 24 +++++++++++++++++-- .../Dockerfile.deploy.es | 0 .../Dockerfile.deploy.os | 0 .../Dockerfile.dev.es | 0 .../Dockerfile.dev.os | 0 6 files changed, 24 insertions(+), 4 deletions(-) rename Dockerfile.deploy.es => dockerfiles/Dockerfile.deploy.es (100%) rename Dockerfile.deploy.os => dockerfiles/Dockerfile.deploy.os (100%) rename Dockerfile.dev.es => dockerfiles/Dockerfile.dev.es (100%) rename Dockerfile.dev.os => dockerfiles/Dockerfile.dev.os (100%) diff --git a/Makefile b/Makefile index 545d2311..4dfb2aec 100644 --- a/Makefile +++ b/Makefile @@ -28,11 +28,11 @@ run_os = docker-compose \ .PHONY: image-deploy-es image-deploy-es: - docker build -f Dockerfile.dev.es -t stac-fastapi-elasticsearch:latest . + docker build -f dockerfiles/Dockerfile.dev.es -t stac-fastapi-elasticsearch:latest . .PHONY: image-deploy-os image-deploy-os: - docker build -f Dockerfile.dev.os -t stac-fastapi-opensearch:latest . + docker build -f dockerfiles/Dockerfile.dev.os -t stac-fastapi-opensearch:latest . .PHONY: run-deploy-locally run-deploy-locally: diff --git a/docker-compose.yml b/docker-compose.yml index 9d665bce..46e56f4c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,7 +7,7 @@ services: restart: always build: context: . - dockerfile: Dockerfile.dev.es + dockerfile: dockerfiles/Dockerfile.dev.es environment: - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -36,7 +36,7 @@ services: restart: always build: context: . - dockerfile: Dockerfile.dev.os + dockerfile: dockerfiles/Dockerfile.dev.os environment: - APP_HOST=0.0.0.0 - APP_PORT=8082 @@ -84,3 +84,23 @@ services: - ./opensearch/snapshots:/usr/share/opensearch/snapshots ports: - "9202:9202" + + mongo: + container_name: mongo-container + image: mongo:7.0.5 + hostname: mongo + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: example + ports: + - 27017:27017 + + mongo-express: + image: mongo-express + restart: always + ports: + - 8081:8081 + environment: + ME_CONFIG_MONGODB_ADMINUSERNAME: root + ME_CONFIG_MONGODB_ADMINPASSWORD: example + ME_CONFIG_MONGODB_URL: mongodb://root:example@mongo:27017/ \ No newline at end of file diff --git a/Dockerfile.deploy.es b/dockerfiles/Dockerfile.deploy.es similarity index 100% rename from Dockerfile.deploy.es rename to dockerfiles/Dockerfile.deploy.es diff --git a/Dockerfile.deploy.os b/dockerfiles/Dockerfile.deploy.os similarity index 100% rename from Dockerfile.deploy.os rename to dockerfiles/Dockerfile.deploy.os diff --git a/Dockerfile.dev.es b/dockerfiles/Dockerfile.dev.es similarity index 100% rename from Dockerfile.dev.es rename to dockerfiles/Dockerfile.dev.es diff --git a/Dockerfile.dev.os b/dockerfiles/Dockerfile.dev.os similarity index 100% rename from Dockerfile.dev.os rename to dockerfiles/Dockerfile.dev.os From 2be8df2c7e8a7109aed075aeb1b36150dcdc93d7 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 13 Feb 2024 11:52:47 +0800 Subject: [PATCH 02/25] add scratch code --- docker-compose.yml | 23 + dockerfiles/Dockerfile.deploy.mongo | 20 + dockerfiles/Dockerfile.dev.mongo | 19 + stac_fastapi/elasticsearch/setup.py | 2 +- stac_fastapi/mongo/README.md | 1 + stac_fastapi/mongo/pytest.ini | 4 + stac_fastapi/mongo/setup.cfg | 2 + stac_fastapi/mongo/setup.py | 55 ++ .../mongo/stac_fastapi/opensearch/__init__.py | 1 + .../mongo/stac_fastapi/opensearch/app.py | 109 +++ .../mongo/stac_fastapi/opensearch/config.py | 81 ++ .../stac_fastapi/opensearch/database_logic.py | 908 ++++++++++++++++++ .../mongo/stac_fastapi/opensearch/version.py | 2 + stac_fastapi/opensearch/setup.py | 2 +- 14 files changed, 1227 insertions(+), 2 deletions(-) create mode 100644 dockerfiles/Dockerfile.deploy.mongo create mode 100644 dockerfiles/Dockerfile.dev.mongo create mode 100644 stac_fastapi/mongo/README.md create mode 100644 stac_fastapi/mongo/pytest.ini create mode 100644 stac_fastapi/mongo/setup.cfg create mode 100644 stac_fastapi/mongo/setup.py create mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py create mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/app.py create mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/config.py create mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py create mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/version.py diff --git a/docker-compose.yml b/docker-compose.yml index 46e56f4c..d968af08 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -59,6 +59,29 @@ services: command: bash -c "./scripts/wait-for-it-es.sh os-container:9202 && python -m stac_fastapi.opensearch.app" + app-mongo: + container_name: stac-fastapi-mongo + image: stac-utils/stac-fastapi-mongo + restart: always + build: + context: . + dockerfile: dockerfiles/Dockerfile.dev.mongo + environment: + - APP_HOST=0.0.0.0 + - APP_PORT=8084 + - RELOAD=true + - ENVIRONMENT=local + - BACKEND=mongo + ports: + - "8084:8084" + volumes: + - ./stac_fastapi:/app/stac_fastapi + - ./scripts:/app/scripts + depends_on: + - mongo + command: + bash -c "./scripts/wait-for-it-es.sh mongo-container:27017 && python -m stac_fastapi.mongo.app" + elasticsearch: container_name: es-container image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.11.0} diff --git a/dockerfiles/Dockerfile.deploy.mongo b/dockerfiles/Dockerfile.deploy.mongo new file mode 100644 index 00000000..8215d0a7 --- /dev/null +++ b/dockerfiles/Dockerfile.deploy.mongo @@ -0,0 +1,20 @@ +FROM python:3.10-slim + +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y install gcc && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +WORKDIR /app + +COPY . /app + +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir ./stac_fastapi/mongo[server] + +EXPOSE 8080 + +CMD ["uvicorn", "stac_fastapi.mongo.app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/dockerfiles/Dockerfile.dev.mongo b/dockerfiles/Dockerfile.dev.mongo new file mode 100644 index 00000000..df8e8494 --- /dev/null +++ b/dockerfiles/Dockerfile.dev.mongo @@ -0,0 +1,19 @@ +FROM python:3.10-slim + + +# update apt pkgs, and install build-essential for ciso8601 +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get install -y build-essential && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# update certs used by Requests +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +WORKDIR /app + +COPY . /app + +RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/mongo[dev,server] diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py index 587c1aee..278cc356 100644 --- a/stac_fastapi/elasticsearch/setup.py +++ b/stac_fastapi/elasticsearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==0.1.0", + "stac-fastapi.core==2.0.0", "elasticsearch[async]==8.11.0", "elasticsearch-dsl==8.11.0", "uvicorn", diff --git a/stac_fastapi/mongo/README.md b/stac_fastapi/mongo/README.md new file mode 100644 index 00000000..ef8b06d4 --- /dev/null +++ b/stac_fastapi/mongo/README.md @@ -0,0 +1 @@ +# stac-fastapi-mongo \ No newline at end of file diff --git a/stac_fastapi/mongo/pytest.ini b/stac_fastapi/mongo/pytest.ini new file mode 100644 index 00000000..db0353ef --- /dev/null +++ b/stac_fastapi/mongo/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = tests +addopts = -sv +asyncio_mode = auto \ No newline at end of file diff --git a/stac_fastapi/mongo/setup.cfg b/stac_fastapi/mongo/setup.cfg new file mode 100644 index 00000000..9f0be4b7 --- /dev/null +++ b/stac_fastapi/mongo/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +version = attr: stac_fastapi.opensearch.version.__version__ diff --git a/stac_fastapi/mongo/setup.py b/stac_fastapi/mongo/setup.py new file mode 100644 index 00000000..8c99f078 --- /dev/null +++ b/stac_fastapi/mongo/setup.py @@ -0,0 +1,55 @@ +"""stac_fastapi: mongodb module.""" + +from setuptools import find_namespace_packages, setup + +with open("README.md") as f: + desc = f.read() + +install_requires = [ + "stac-fastapi.core==2.0.0", + "opensearch-py==2.4.2", + "opensearch-py[async]==2.4.2", + "uvicorn", + "starlette", +] + +extra_reqs = { + "dev": [ + "pytest", + "pytest-cov", + "pytest-asyncio", + "pre-commit", + "requests", + "ciso8601", + "httpx", + ], + "docs": ["mkdocs", "mkdocs-material", "pdocs"], + "server": ["uvicorn[standard]==0.19.0"], +} + +setup( + name="stac-fastapi.mongo", + description="Mongodb stac-fastapi backend.", + long_description=desc, + long_description_content_type="text/markdown", + python_requires=">=3.8", + classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + ], + url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", + license="MIT", + packages=find_namespace_packages(), + zip_safe=False, + install_requires=install_requires, + extras_require=extra_reqs, + entry_points={ + "console_scripts": ["stac-fastapi-opensearch=stac_fastapi.opensearch.app:run"] + }, +) diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py b/stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py new file mode 100644 index 00000000..342b8919 --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py @@ -0,0 +1 @@ +"""opensearch submodule.""" diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/app.py b/stac_fastapi/mongo/stac_fastapi/opensearch/app.py new file mode 100644 index 00000000..ebb2921e --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/app.py @@ -0,0 +1,109 @@ +"""FastAPI application.""" + +from stac_fastapi.api.app import StacApi +from stac_fastapi.api.models import create_get_request_model, create_post_request_model +from stac_fastapi.core.core import ( + BulkTransactionsClient, + CoreClient, + EsAsyncBaseFiltersClient, + TransactionsClient, +) +from stac_fastapi.core.extensions import QueryExtension +from stac_fastapi.core.session import Session +from stac_fastapi.extensions.core import ( + ContextExtension, + FieldsExtension, + FilterExtension, + SortExtension, + TokenPaginationExtension, + TransactionExtension, +) +from stac_fastapi.extensions.third_party import BulkTransactionExtension +from stac_fastapi.opensearch.config import OpensearchSettings +from stac_fastapi.opensearch.database_logic import ( + DatabaseLogic, + create_collection_index, +) + +settings = OpensearchSettings() +session = Session.create_from_settings(settings) + +filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +filter_extension.conformance_classes.append( + "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" +) + +database_logic = DatabaseLogic() + +extensions = [ + TransactionExtension( + client=TransactionsClient( + database=database_logic, session=session, settings=settings + ), + settings=settings, + ), + BulkTransactionExtension( + client=BulkTransactionsClient( + database=database_logic, + session=session, + settings=settings, + ) + ), + FieldsExtension(), + QueryExtension(), + SortExtension(), + TokenPaginationExtension(), + ContextExtension(), + filter_extension, +] + +post_request_model = create_post_request_model(extensions) + +api = StacApi( + settings=settings, + extensions=extensions, + client=CoreClient( + database=database_logic, session=session, post_request_model=post_request_model + ), + search_get_request_model=create_get_request_model(extensions), + search_post_request_model=post_request_model, +) +app = api.app + + +@app.on_event("startup") +async def _startup_event() -> None: + await create_collection_index() + + +def run() -> None: + """Run app from command line using uvicorn if available.""" + try: + import uvicorn + + uvicorn.run( + "stac_fastapi.opensearch.app:app", + host=settings.app_host, + port=settings.app_port, + log_level="info", + reload=settings.reload, + ) + except ImportError: + raise RuntimeError("Uvicorn must be installed in order to use command") + + +if __name__ == "__main__": + run() + + +def create_handler(app): + """Create a handler to use with AWS Lambda if mangum available.""" + try: + from mangum import Mangum + + return Mangum(app) + except ImportError: + return None + + +handler = create_handler(app) diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/config.py b/stac_fastapi/mongo/stac_fastapi/opensearch/config.py new file mode 100644 index 00000000..a53859fa --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/config.py @@ -0,0 +1,81 @@ +"""API configuration.""" +import os +import ssl +from typing import Any, Dict, Set + +from opensearchpy import AsyncOpenSearch, OpenSearch + +from stac_fastapi.types.config import ApiSettings + + +def _es_config() -> Dict[str, Any]: + # Determine the scheme (http or https) + use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" + scheme = "https" if use_ssl else "http" + + # Configure the hosts parameter with the correct scheme + hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] + + # Initialize the configuration dictionary + config = { + "hosts": hosts, + "headers": {"accept": "application/json", "Content-Type": "application/json"}, + } + + # Explicitly exclude SSL settings when not using SSL + if not use_ssl: + return config + + # Include SSL settings if using https + config["ssl_version"] = ssl.TLSVersion.TLSv1_3 # type: ignore + config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore + + # Include CA Certificates if verifying certs + if config["verify_certs"]: + config["ca_certs"] = os.getenv( + "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" + ) + + # Handle authentication + if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): + config["http_auth"] = (u, p) + + if api_key := os.getenv("ES_API_KEY"): + if isinstance(config["headers"], dict): + headers = {**config["headers"], "x-api-key": api_key} + + else: + config["headers"] = {"x-api-key": api_key} + + config["headers"] = headers + + return config + + +_forbidden_fields: Set[str] = {"type"} + + +class OpensearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create es client.""" + return OpenSearch(**_es_config()) + + +class AsyncOpensearchSettings(ApiSettings): + """API settings.""" + + # Fields which are defined by STAC but not included in the database model + forbidden_fields: Set[str] = _forbidden_fields + indexed_fields: Set[str] = {"datetime"} + + @property + def create_client(self): + """Create async elasticsearch client.""" + return AsyncOpenSearch(**_es_config()) diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py new file mode 100644 index 00000000..a946f82f --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py @@ -0,0 +1,908 @@ +"""Database logic.""" +import asyncio +import logging +import os +from base64 import urlsafe_b64decode, urlsafe_b64encode +from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union + +import attr +from opensearchpy import exceptions, helpers +from opensearchpy.exceptions import TransportError +from opensearchpy.helpers.query import Q +from opensearchpy.helpers.search import Search + +from stac_fastapi.core import serializers +from stac_fastapi.core.extensions import filter +from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, +) +from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings +from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.stac import Collection, Item + +logger = logging.getLogger(__name__) + +NumType = Union[float, int] + +COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +ES_INDEX_NAME_UNSUPPORTED_CHARS = { + "\\", + "/", + "*", + "?", + '"', + "<", + ">", + "|", + " ", + ",", + "#", + ":", +} + +ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" + +DEFAULT_SORT = { + "properties.datetime": {"order": "desc"}, + "id": {"order": "desc"}, + "collection": {"order": "desc"}, +} + +ES_ITEMS_SETTINGS = { + "index": { + "sort.field": list(DEFAULT_SORT.keys()), + "sort.order": [v["order"] for v in DEFAULT_SORT.values()], + } +} + +ES_MAPPINGS_DYNAMIC_TEMPLATES = [ + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + { + "descriptions": { + "match_mapping_type": "string", + "match": "description", + "mapping": {"type": "text"}, + } + }, + { + "titles": { + "match_mapping_type": "string", + "match": "title", + "mapping": {"type": "text"}, + } + }, + # Projection Extension https://github.com/stac-extensions/projection + {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, + { + "proj_projjson": { + "match": "proj:projjson", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "proj_centroid": { + "match": "proj:centroid", + "mapping": {"type": "geo_point"}, + } + }, + { + "proj_geometry": { + "match": "proj:geometry", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "no_index_href": { + "match": "href", + "mapping": {"type": "text", "index": False}, + } + }, + # Default all other strings not otherwise specified to keyword + {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, + {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, +] + +ES_ITEMS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "geometry": {"type": "geo_shape"}, + "assets": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "properties": { + "type": "object", + "properties": { + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + "datetime": {"type": "date"}, + "start_datetime": {"type": "date"}, + "end_datetime": {"type": "date"}, + "created": {"type": "date"}, + "updated": {"type": "date"}, + # Satellite Extension https://github.com/stac-extensions/sat + "sat:absolute_orbit": {"type": "integer"}, + "sat:relative_orbit": {"type": "integer"}, + }, + }, + }, +} + +ES_COLLECTIONS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "extent.spatial.bbox": {"type": "long"}, + "extent.temporal.interval": {"type": "date"}, + "providers": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "item_assets": {"type": "object", "enabled": False}, + }, +} + + +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is None, returns the default indices. + """ + if collection_ids is None: + return ITEM_INDICES + else: + return ",".join([index_by_collection_id(c) for c in collection_ids]) + + +async def create_collection_index() -> None: + """ + Create the index for a Collection. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + + search_body = { + "mappings": ES_COLLECTIONS_MAPPINGS, + "aliases": {COLLECTIONS_INDEX: {}}, + } + + index = f"{COLLECTIONS_INDEX}-000001" + + try: + await client.indices.create(index=index, body=search_body) + except TransportError as e: + if e.status_code == 400: + pass # Ignore 400 status codes + else: + raise e + + await client.close() + + +async def create_item_index(collection_id: str): + """ + Create the index for Items. + + Args: + collection_id (str): Collection identifier. + + Returns: + None + + """ + client = AsyncSearchSettings().create_client + index_name = index_by_collection_id(collection_id) + search_body = { + "aliases": {index_name: {}}, + "mappings": ES_ITEMS_MAPPINGS, + "settings": ES_ITEMS_SETTINGS, + } + + try: + await client.indices.create(index=f"{index_name}-000001", body=search_body) + except TransportError as e: + if e.status_code == 400: + pass # Ignore 400 status codes + else: + raise e + + await client.close() + + +async def delete_item_index(collection_id: str): + """Delete the index for items in a collection. + + Args: + collection_id (str): The ID of the collection whose items index will be deleted. + """ + client = AsyncSearchSettings().create_client + + name = index_by_collection_id(collection_id) + resolved = await client.indices.resolve_index(name=name) + if "aliases" in resolved and resolved["aliases"]: + [alias] = resolved["aliases"] + await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) + await client.indices.delete(index=alias["indices"]) + else: + await client.indices.delete(index=name) + await client.close() + + +def mk_item_id(item_id: str, collection_id: str): + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +def mk_actions(collection_id: str, processed_items: List[Item]): + """Create Elasticsearch bulk actions for a list of processed items. + + Args: + collection_id (str): The identifier for the collection the items belong to. + processed_items (List[Item]): The list of processed items to be bulk indexed. + + Returns: + List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, + each action being a dictionary with the following keys: + - `_index`: the index to store the document in. + - `_id`: the document's identifier. + - `_source`: the source of the document. + """ + return [ + { + "_index": index_by_collection_id(collection_id), + "_id": mk_item_id(item["id"], item["collection"]), + "_source": item, + } + for item in processed_items + ] + + +# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, +# So create our own Protocol for typing +# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] +class Geometry(Protocol): # noqa + type: str + coordinates: Any + + +@attr.s +class DatabaseLogic: + """Database logic.""" + + client = AsyncSearchSettings().create_client + sync_client = SyncSearchSettings().create_client + + item_serializer: Type[serializers.ItemSerializer] = attr.ib( + default=serializers.ItemSerializer + ) + collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( + default=serializers.CollectionSerializer + ) + + """CORE LOGIC""" + + async def get_all_collections( + self, + token: Optional[str], + limit: int, + ) -> Iterable[Dict[str, Any]]: + """Retrieve a list of all collections from the database. + + Args: + token (Optional[str]): The token used to return the next set of results. + limit (int): Number of results to return + + Returns: + collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection. + + Notes: + The collections are retrieved from the Elasticsearch database using the `client.search` method, + with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records. + The result is a generator of dictionaries containing the source data for each collection. + """ + search_body: Dict[str, Any] = {} + if token: + search_after = urlsafe_b64decode(token.encode()).decode().split(",") + search_body["search_after"] = search_after + + search_body["sort"] = {"id": {"order": "asc"}} + + collections = await self.client.search( + index=COLLECTIONS_INDEX, body=search_body, size=limit + ) + hits = collections["hits"]["hits"] + return hits + + async def get_one_item(self, collection_id: str, item_id: str) -> Dict: + """Retrieve a single item from the database. + + Args: + collection_id (str): The id of the Collection that the Item belongs to. + item_id (str): The id of the Item. + + Returns: + item (Dict): A dictionary containing the source data for the Item. + + Raises: + NotFoundError: If the specified Item does not exist in the Collection. + + Notes: + The Item is retrieved from the Elasticsearch database using the `client.get` method, + with the index for the Collection as the target index and the combined `mk_item_id` as the document id. + """ + try: + item = await self.client.get( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} does not exist in Collection {collection_id}" + ) + return item["_source"] + + @staticmethod + def make_search(): + """Database logic to create a Search instance.""" + return Search().sort(*DEFAULT_SORT) + + @staticmethod + def apply_ids_filter(search: Search, item_ids: List[str]): + """Database logic to search a list of STAC item ids.""" + return search.filter("terms", id=item_ids) + + @staticmethod + def apply_collections_filter(search: Search, collection_ids: List[str]): + """Database logic to search a list of STAC collection ids.""" + return search.filter("terms", collection=collection_ids) + + @staticmethod + def apply_datetime_filter(search: Search, datetime_search): + """Apply a filter to search based on datetime field. + + Args: + search (Search): The search object to filter. + datetime_search (dict): The datetime filter criteria. + + Returns: + Search: The filtered search object. + """ + if "eq" in datetime_search: + search = search.filter( + "term", **{"properties__datetime": datetime_search["eq"]} + ) + else: + search = search.filter( + "range", properties__datetime={"lte": datetime_search["lte"]} + ) + search = search.filter( + "range", properties__datetime={"gte": datetime_search["gte"]} + ) + return search + + @staticmethod + def apply_bbox_filter(search: Search, bbox: List): + """Filter search results based on bounding box. + + Args: + search (Search): The search object to apply the filter to. + bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. + + Returns: + search (Search): The search object with the bounding box filter applied. + + Notes: + The bounding box is transformed into a polygon using the `bbox2polygon` function and + a geo_shape filter is added to the search object, set to intersect with the specified polygon. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": "polygon", + "coordinates": bbox2polygon(*bbox), + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_intersects_filter( + search: Search, + intersects: Geometry, + ): + """Filter search results based on intersecting geometry. + + Args: + search (Search): The search object to apply the filter to. + intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. + + Returns: + search (Search): The search object with the intersecting geometry filter applied. + + Notes: + A geo_shape filter is added to the search object, set to intersect with the specified geometry. + """ + return search.filter( + Q( + { + "geo_shape": { + "geometry": { + "shape": { + "type": intersects.type.lower(), + "coordinates": intersects.coordinates, + }, + "relation": "intersects", + } + } + } + ) + ) + + @staticmethod + def apply_stacql_filter(search: Search, op: str, field: str, value: float): + """Filter search results based on a comparison between a field and a value. + + Args: + search (Search): The search object to apply the filter to. + op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), + 'lt' (less than), or 'lte' (less than or equal). + field (str): The field to perform the comparison on. + value (float): The value to compare the field against. + + Returns: + search (Search): The search object with the specified filter applied. + """ + if op != "eq": + key_filter = {field: {f"{op}": value}} + search = search.filter(Q("range", **key_filter)) + else: + search = search.filter("term", **{field: value}) + + return search + + @staticmethod + def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): + """Database logic to perform query for search endpoint.""" + if _filter is not None: + search = search.filter(filter.Clause.parse_obj(_filter).to_es()) + return search + + @staticmethod + def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: + """Database logic to sort search instance.""" + if sortby: + return {s.field: {"order": s.direction} for s in sortby} + else: + return None + + async def execute_search( + self, + search: Search, + limit: int, + token: Optional[str], + sort: Optional[Dict[str, Dict[str, str]]], + collection_ids: Optional[List[str]], + ignore_unavailable: bool = True, + ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: + """Execute a search query with limit and other optional parameters. + + Args: + search (Search): The search query to be executed. + limit (int): The maximum number of results to be returned. + token (Optional[str]): The token used to return the next set of results. + sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. + collection_ids (Optional[List[str]]): The collection ids to search. + ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. + + Returns: + Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: + - An iterable of search results, where each result is a dictionary with keys and values representing the + fields and values of each document. + - The total number of results (if the count could be computed), or None if the count could not be + computed. + - The token to be used to retrieve the next set of results, or None if there are no more results. + + Raises: + NotFoundError: If the collections specified in `collection_ids` do not exist. + """ + search_body: Dict[str, Any] = {} + query = search.query.to_dict() if search.query else None + if query: + search_body["query"] = query + if token: + search_after = urlsafe_b64decode(token.encode()).decode().split(",") + search_body["search_after"] = search_after + search_body["sort"] = sort if sort else DEFAULT_SORT + + index_param = indices(collection_ids) + + search_task = asyncio.create_task( + self.client.search( + index=index_param, + ignore_unavailable=ignore_unavailable, + body=search_body, + size=limit, + ) + ) + + count_task = asyncio.create_task( + self.client.count( + index=index_param, + ignore_unavailable=ignore_unavailable, + body=search.to_dict(count=True), + ) + ) + + try: + es_response = await search_task + except exceptions.NotFoundError: + raise NotFoundError(f"Collections '{collection_ids}' do not exist") + + hits = es_response["hits"]["hits"] + items = (hit["_source"] for hit in hits) + + next_token = None + if hits and (sort_array := hits[-1].get("sort")): + next_token = urlsafe_b64encode( + ",".join([str(x) for x in sort_array]).encode() + ).decode() + + # (1) count should not block returning results, so don't wait for it to be done + # (2) don't cancel the task so that it will populate the ES cache for subsequent counts + maybe_count = None + if count_task.done(): + try: + maybe_count = count_task.result().get("count") + except Exception as e: + logger.error(f"Count task failed: {e}") + + return items, maybe_count, next_token + + """ TRANSACTION LOGIC """ + + async def check_collection_exists(self, collection_id: str): + """Database logic to check if a collection exists.""" + if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + async def prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Preps an item for insertion into the database. + + Args: + item (Item): The item to be prepped for insertion. + base_url (str): The base URL used to create the item's self URL. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The prepped item. + + Raises: + ConflictError: If the item already exists in the database. + + """ + await self.check_collection_exists(collection_id=item["collection"]) + + if not exist_ok and await self.client.exists( + index=index_by_collection_id(item["collection"]), + id=mk_item_id(item["id"], item["collection"]), + ): + raise ConflictError( + f"Item {item['id']} in collection {item['collection']} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + def sync_prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Prepare an item for insertion into the database. + + This method performs pre-insertion preparation on the given `item`, + such as checking if the collection the item belongs to exists, + and optionally verifying that an item with the same ID does not already exist in the database. + + Args: + item (Item): The item to be inserted into the database. + base_url (str): The base URL used for constructing URLs for the item. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The item after preparation is done. + + Raises: + NotFoundError: If the collection that the item belongs to does not exist in the database. + ConflictError: If an item with the same ID already exists in the collection. + """ + item_id = item["id"] + collection_id = item["collection"] + if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise NotFoundError(f"Collection {collection_id} does not exist") + + if not exist_ok and self.sync_client.exists( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ): + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + return self.item_serializer.stac_to_db(item, base_url) + + async def create_item(self, item: Item, refresh: bool = False): + """Database logic for creating one item. + + Args: + item (Item): The item to be created. + refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. + + Raises: + ConflictError: If the item already exists in the database. + + Returns: + None + """ + # todo: check if collection exists, but cache + item_id = item["id"] + collection_id = item["collection"] + es_resp = await self.client.index( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + body=item, + refresh=refresh, + ) + + if (meta := es_resp.get("meta")) and meta.get("status") == 409: + raise ConflictError( + f"Item {item_id} in collection {collection_id} already exists" + ) + + async def delete_item( + self, item_id: str, collection_id: str, refresh: bool = False + ): + """Delete a single item from the database. + + Args: + item_id (str): The id of the Item to be deleted. + collection_id (str): The id of the Collection that the Item belongs to. + refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. + + Raises: + NotFoundError: If the Item does not exist in the database. + """ + try: + await self.client.delete( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + refresh=refresh, + ) + except exceptions.NotFoundError: + raise NotFoundError( + f"Item {item_id} in collection {collection_id} not found" + ) + + async def create_collection(self, collection: Collection, refresh: bool = False): + """Create a single collection in the database. + + Args: + collection (Collection): The Collection object to be created. + refresh (bool, optional): Whether to refresh the index after the creation. Default is False. + + Raises: + ConflictError: If a Collection with the same id already exists in the database. + + Notes: + A new index is created for the items in the Collection using the `create_item_index` function. + """ + collection_id = collection["id"] + + if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): + raise ConflictError(f"Collection {collection_id} already exists") + + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + body=collection, + refresh=refresh, + ) + + await create_item_index(collection_id) + + async def find_collection(self, collection_id: str) -> Collection: + """Find and return a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be found. + + Returns: + Collection: The found collection, represented as a `Collection` object. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function searches for a collection in the database using the specified `collection_id` and returns the found + collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. + """ + try: + collection = await self.client.get( + index=COLLECTIONS_INDEX, id=collection_id + ) + except exceptions.NotFoundError: + raise NotFoundError(f"Collection {collection_id} not found") + + return collection["_source"] + + async def update_collection( + self, collection_id: str, collection: Collection, refresh: bool = False + ): + """Update a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be updated. + collection (Collection): The Collection object to be used for the update. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not + found in the database. + + Notes: + This function updates the collection in the database using the specified + `collection_id` and with the collection specified in the `Collection` object. + If the collection is not found, a `NotFoundError` is raised. + """ + await self.find_collection(collection_id=collection_id) + + if collection_id != collection["id"]: + await self.create_collection(collection, refresh=refresh) + + await self.client.reindex( + body={ + "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, + "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, + "script": { + "lang": "painless", + "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", + }, + }, + wait_for_completion=True, + refresh=refresh, + ) + + await self.delete_collection(collection_id) + + else: + await self.client.index( + index=COLLECTIONS_INDEX, + id=collection_id, + body=collection, + refresh=refresh, + ) + + async def delete_collection(self, collection_id: str, refresh: bool = False): + """Delete a collection from the database. + + Parameters: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be deleted. + refresh (bool): Whether to refresh the index after the deletion (default: False). + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + + Notes: + This function first verifies that the collection with the specified `collection_id` exists in the database, and then + deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this + function also calls `delete_item_index` to delete the index for the items in the collection. + """ + await self.find_collection(collection_id=collection_id) + await self.client.delete( + index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh + ) + await delete_item_index(collection_id) + + async def bulk_async( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database asynchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The + `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the + index is refreshed after the bulk insert. The function does not return any value. + """ + await helpers.async_bulk( + self.client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + def bulk_sync( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database synchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed synchronously and blocking, meaning that the function does not return until the insert has + completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to + True, the index is refreshed after the bulk insert. The function does not return any value. + """ + helpers.bulk( + self.sync_client, + mk_actions(collection_id, processed_items), + refresh=refresh, + raise_on_error=False, + ) + + # DANGER + async def delete_items(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=ITEM_INDICES, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) + + # DANGER + async def delete_collections(self) -> None: + """Danger. this is only for tests.""" + await self.client.delete_by_query( + index=COLLECTIONS_INDEX, + body={"query": {"match_all": {}}}, + wait_for_completion=True, + ) diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/version.py b/stac_fastapi/mongo/stac_fastapi/opensearch/version.py new file mode 100644 index 00000000..6b648e2b --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/version.py @@ -0,0 +1,2 @@ +"""library version.""" +__version__ = "2.0.0" diff --git a/stac_fastapi/opensearch/setup.py b/stac_fastapi/opensearch/setup.py index 9811c2ad..224e733b 100644 --- a/stac_fastapi/opensearch/setup.py +++ b/stac_fastapi/opensearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==0.1.0", + "stac-fastapi.core==2.0.0", "opensearch-py==2.4.2", "opensearch-py[async]==2.4.2", "uvicorn", From e2bedb3197ab89374f62dd646d6e5f260673d4b7 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 13 Feb 2024 12:17:11 +0800 Subject: [PATCH 03/25] add pymongo, motor libraries --- stac_fastapi/mongo/setup.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/stac_fastapi/mongo/setup.py b/stac_fastapi/mongo/setup.py index 8c99f078..5be4ad41 100644 --- a/stac_fastapi/mongo/setup.py +++ b/stac_fastapi/mongo/setup.py @@ -7,9 +7,8 @@ install_requires = [ "stac-fastapi.core==2.0.0", - "opensearch-py==2.4.2", - "opensearch-py[async]==2.4.2", - "uvicorn", + "motor==3.3.2", + "pymongo==4.6.1" "uvicorn", "starlette", ] @@ -49,7 +48,5 @@ zip_safe=False, install_requires=install_requires, extras_require=extra_reqs, - entry_points={ - "console_scripts": ["stac-fastapi-opensearch=stac_fastapi.opensearch.app:run"] - }, + entry_points={"console_scripts": ["stac-fastapi-mongo=stac_fastapi.mongo.app:run"]}, ) From 3a970213d1f2cf72ad005ab2934e2931b01c89ad Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 13 Feb 2024 12:19:54 +0800 Subject: [PATCH 04/25] config scratch --- docker-compose.yml | 14 +-- .../mongo/stac_fastapi/opensearch/config.py | 94 ++++++++----------- 2 files changed, 46 insertions(+), 62 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index d968af08..e170f4a0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -113,17 +113,17 @@ services: image: mongo:7.0.5 hostname: mongo environment: - MONGO_INITDB_ROOT_USERNAME: root - MONGO_INITDB_ROOT_PASSWORD: example + - MONGO_INITDB_ROOT_USERNAME: root + - MONGO_INITDB_ROOT_PASSWORD: example ports: - - 27017:27017 + - "27017:27017" mongo-express: image: mongo-express restart: always ports: - - 8081:8081 + - "8081:8081" environment: - ME_CONFIG_MONGODB_ADMINUSERNAME: root - ME_CONFIG_MONGODB_ADMINPASSWORD: example - ME_CONFIG_MONGODB_URL: mongodb://root:example@mongo:27017/ \ No newline at end of file + - ME_CONFIG_MONGODB_ADMINUSERNAME: root + - ME_CONFIG_MONGODB_ADMINPASSWORD: example + - ME_CONFIG_MONGODB_URL: mongodb://root:example@mongo:27017/ \ No newline at end of file diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/config.py b/stac_fastapi/mongo/stac_fastapi/opensearch/config.py index a53859fa..e9e12a1a 100644 --- a/stac_fastapi/mongo/stac_fastapi/opensearch/config.py +++ b/stac_fastapi/mongo/stac_fastapi/opensearch/config.py @@ -1,81 +1,65 @@ """API configuration.""" import os -import ssl -from typing import Any, Dict, Set +from typing import Set -from opensearchpy import AsyncOpenSearch, OpenSearch +from motor.motor_asyncio import AsyncIOMotorClient +from pymongo import MongoClient from stac_fastapi.types.config import ApiSettings -def _es_config() -> Dict[str, Any]: - # Determine the scheme (http or https) - use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" - scheme = "https" if use_ssl else "http" - - # Configure the hosts parameter with the correct scheme - hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] - - # Initialize the configuration dictionary - config = { - "hosts": hosts, - "headers": {"accept": "application/json", "Content-Type": "application/json"}, - } - - # Explicitly exclude SSL settings when not using SSL - if not use_ssl: - return config - - # Include SSL settings if using https - config["ssl_version"] = ssl.TLSVersion.TLSv1_3 # type: ignore - config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore - - # Include CA Certificates if verifying certs - if config["verify_certs"]: - config["ca_certs"] = os.getenv( - "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" - ) - - # Handle authentication - if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): - config["http_auth"] = (u, p) - - if api_key := os.getenv("ES_API_KEY"): - if isinstance(config["headers"], dict): - headers = {**config["headers"], "x-api-key": api_key} - - else: - config["headers"] = {"x-api-key": api_key} - - config["headers"] = headers - - return config +def _mongodb_uri() -> str: + # MongoDB connection URI construction + user = os.getenv("MONGO_USER") + password = os.getenv("MONGO_PASS") + host = os.getenv("MONGO_HOST", "localhost") + port = os.getenv("MONGO_PORT", "27017") + database = os.getenv( + "MONGO_DB", "admin" + ) # Default to admin database for authentication + use_ssl = os.getenv("MONGO_USE_SSL", "false").lower() == "true" + ssl_cert_reqs = ( + "CERT_NONE" + if os.getenv("MONGO_VERIFY_CERTS", "false").lower() == "false" + else "CERT_REQUIRED" + ) + + # Adjust URI based on whether using SRV record or not + if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): + # SRV connection string format does not use port + uri = f"mongodb+srv://{user}:{password}@{host}/{database}?retryWrites=true&w=majority" + else: + # Standard connection string format with port + uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?retryWrites=true" + + if use_ssl: + uri += f"&ssl=true&ssl_cert_reqs={ssl_cert_reqs}" + + return uri _forbidden_fields: Set[str] = {"type"} -class OpensearchSettings(ApiSettings): +class MongoDBSettings(ApiSettings): """API settings.""" - # Fields which are defined by STAC but not included in the database model forbidden_fields: Set[str] = _forbidden_fields indexed_fields: Set[str] = {"datetime"} @property - def create_client(self): - """Create es client.""" - return OpenSearch(**_es_config()) + def create_client(self) -> MongoClient: + """Create MongoDB client.""" + return MongoClient(_mongodb_uri()) -class AsyncOpensearchSettings(ApiSettings): +class AsyncMongoDBSettings(ApiSettings): """API settings.""" - # Fields which are defined by STAC but not included in the database model forbidden_fields: Set[str] = _forbidden_fields indexed_fields: Set[str] = {"datetime"} @property - def create_client(self): - """Create async elasticsearch client.""" - return AsyncOpenSearch(**_es_config()) + def create_client(self) -> AsyncIOMotorClient: + """Create async MongoDB client.""" + return AsyncIOMotorClient(_mongodb_uri()) From 4ac9184f59f75599d71694caf8cdca084812fa7d Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 20 Feb 2024 00:00:18 +0800 Subject: [PATCH 05/25] mongo db scratch --- docker-compose.yml | 3 + .../{opensearch => mongo}/__init__.py | 0 .../stac_fastapi/{opensearch => mongo}/app.py | 9 +- .../{opensearch => mongo}/config.py | 0 .../stac_fastapi/mongo/database_logic.py | 984 ++++++++++++++++++ .../{opensearch => mongo}/version.py | 0 .../stac_fastapi/opensearch/database_logic.py | 908 ---------------- 7 files changed, 990 insertions(+), 914 deletions(-) rename stac_fastapi/mongo/stac_fastapi/{opensearch => mongo}/__init__.py (100%) rename stac_fastapi/mongo/stac_fastapi/{opensearch => mongo}/app.py (93%) rename stac_fastapi/mongo/stac_fastapi/{opensearch => mongo}/config.py (100%) create mode 100644 stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py rename stac_fastapi/mongo/stac_fastapi/{opensearch => mongo}/version.py (100%) delete mode 100644 stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py diff --git a/docker-compose.yml b/docker-compose.yml index e170f4a0..be90928c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -72,6 +72,9 @@ services: - RELOAD=true - ENVIRONMENT=local - BACKEND=mongo + - MONGO_DB=stac + - MONGO_USER=root + - MONGO_PASS=example ports: - "8084:8084" volumes: diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py b/stac_fastapi/mongo/stac_fastapi/mongo/__init__.py similarity index 100% rename from stac_fastapi/mongo/stac_fastapi/opensearch/__init__.py rename to stac_fastapi/mongo/stac_fastapi/mongo/__init__.py diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/app.py b/stac_fastapi/mongo/stac_fastapi/mongo/app.py similarity index 93% rename from stac_fastapi/mongo/stac_fastapi/opensearch/app.py rename to stac_fastapi/mongo/stac_fastapi/mongo/app.py index ebb2921e..0d845c50 100644 --- a/stac_fastapi/mongo/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/app.py @@ -19,13 +19,10 @@ TransactionExtension, ) from stac_fastapi.extensions.third_party import BulkTransactionExtension -from stac_fastapi.opensearch.config import OpensearchSettings -from stac_fastapi.opensearch.database_logic import ( - DatabaseLogic, - create_collection_index, -) +from stac_fastapi.mongo.config import AsyncMongoDBSettings +from stac_fastapi.mongo.database_logic import DatabaseLogic, create_collection_index -settings = OpensearchSettings() +settings = AsyncMongoDBSettings session = Session.create_from_settings(settings) filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/config.py b/stac_fastapi/mongo/stac_fastapi/mongo/config.py similarity index 100% rename from stac_fastapi/mongo/stac_fastapi/opensearch/config.py rename to stac_fastapi/mongo/stac_fastapi/mongo/config.py diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py new file mode 100644 index 00000000..6732b43d --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -0,0 +1,984 @@ +"""Database logic.""" +import base64 +import json +import logging +import os +from base64 import urlsafe_b64decode +from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union + +import attr +from bson import ObjectId +from pymongo.errors import ( + BulkWriteError, + CollectionInvalid, + DuplicateKeyError, + PyMongoError, +) + +from stac_fastapi.core import serializers +from stac_fastapi.core.extensions import filter +from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSearchSettings +from stac_fastapi.mongo.config import MongoDBSettings as SyncSearchSettings +from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.stac import Collection, Item + +logger = logging.getLogger(__name__) + +NumType = Union[float, int] + +COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +DATABASE = os.getenv("MONGO_DB", "admin") + + +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + unsupported_chars = set('/\\ ."*<>:|?$') + sanitized = "".join(c for c in collection_id if c not in unsupported_chars) + return f"{ITEMS_INDEX_PREFIX}{sanitized.lower()}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is None, returns the default indices. + """ + if collection_ids is None: + return COLLECTIONS_INDEX + else: + return ",".join([index_by_collection_id(c) for c in collection_ids]) + + +async def create_collection_index(): + """ + Ensure indexes for the collections collection in MongoDB using the asynchronous client. + + Returns: + None + """ + client = AsyncSearchSettings.create_client() + db = client[DATABASE] + + try: + await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) + print("Index created successfully.") + except Exception as e: + # Handle exceptions, which could be due to existing index conflicts, etc. + print(f"An error occurred while creating the index: {e}") + finally: + await client.close() + + +async def create_item_index(collection_id: str): + """ + Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. + + Args: + collection_id (str): Collection identifier used to derive the MongoDB collection name for items. + + Returns: + None + """ + client = AsyncSearchSettings.create_client() + db = client[DATABASE] + + # Derive the collection name for items based on the collection_id + collection_name = index_by_collection_id(collection_id) + + try: + await db[collection_name].create_index([("properties.datetime", -1)]) + await db[collection_name].create_index([("id", 1)], unique=True) + await db[collection_name].create_index([("geometry", "2dsphere")]) + print(f"Indexes created successfully for collection: {collection_name}.") + except Exception as e: + # Handle exceptions, which could be due to existing index conflicts, etc. + print( + f"An error occurred while creating indexes for collection {collection_name}: {e}" + ) + finally: + await client.close() + + +async def delete_item_index(collection_id: str): + """ + Drop the MongoDB collection corresponding to the specified collection ID. + + This operation is the MongoDB equivalent of deleting an Elasticsearch index, removing both the data and + the structure for the specified collection's items. + + Args: + collection_id (str): The ID of the collection whose associated MongoDB collection will be dropped. + """ + client = AsyncSearchSettings.create_client() + db = client[DATABASE] + + # Derive the MongoDB collection name using the collection ID + collection_name = index_by_collection_id(collection_id) + + try: + # Drop the collection, removing both its data and structure + await db[collection_name].drop() + logger.info(f"Collection '{collection_name}' successfully dropped.") + except Exception as e: + logger.error(f"Error dropping collection '{collection_name}': {e}") + finally: + await client.close() + + +def mk_item_id(item_id: str, collection_id: str): + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +class Geometry(Protocol): # noqa + type: str + coordinates: Any + + +class MongoSearchAdapter: + """ + Adapter class to manage search filters and sorting for MongoDB queries. + + Attributes: + filters (list): A list of filter conditions to be applied to the MongoDB query. + sort (list): A list of tuples specifying field names and their corresponding sort directions + for MongoDB sorting. + + Methods: + add_filter(filter_condition): Adds a new filter condition to the filters list. + set_sort(sort_conditions): Sets the sorting criteria based on a dictionary of field names + and sort directions. + """ + + def __init__(self): + """ + Initialize the MongoSearchAdapter with default sorting criteria. + + The default sort order is by 'properties.datetime' in descending order, followed by 'id' in descending order, + and finally by 'collection' in descending order. This matches typical STAC item queries where the most recent items + are retrieved first. + """ + self.filters = [] + # MongoDB uses a list of tuples for sorting: [('field1', direction), ('field2', direction)] + # Convert the DEFAULT_SORT dict to this format, considering MongoDB's sorting capabilities + self.sort = [("properties.datetime", -1), ("id", -1), ("collection", -1)] + + def add_filter(self, filter_condition): + """ + Add a filter condition to the query. + + This method appends a new filter condition to the list of existing filters. Each filter condition + should be a dictionary representing a MongoDB query condition. + + Args: + filter_condition (dict): A dictionary representing a MongoDB filter condition. + """ + self.filters.append(filter_condition) + + def set_sort(self, sort_conditions): + """ + Set the sorting criteria for the query based on provided conditions. + + This method translates a dictionary of field names and sort directions (asc or desc) into MongoDB's + format for sorting queries. It overwrites any existing sort criteria with the new criteria provided. + + Args: + sort_conditions (dict): A dictionary where keys are field names and values are dictionaries + indicating sort direction ('asc' for ascending or 'desc' for descending). + """ + self.sort = [] + for field, details in sort_conditions.items(): + direction = 1 if details["order"] == "asc" else -1 + self.sort.append((field, direction)) + + +@attr.s +class DatabaseLogic: + """Database logic.""" + + client = AsyncSearchSettings().create_client + sync_client = SyncSearchSettings().create_client + + item_serializer: Type[serializers.ItemSerializer] = attr.ib( + default=serializers.ItemSerializer + ) + collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( + default=serializers.CollectionSerializer + ) + + def __init__(self): + """Init.""" + self.db_name = os.getenv("MONGO_DB", "default_db_name") + + """CORE LOGIC""" + + async def get_all_collections( + self, + token: Optional[str], + limit: int, + ) -> Iterable[Dict[str, Any]]: + """Retrieve a list of all collections from the database. + + Args: + token (Optional[str]): The token used to return the next set of results. + limit (int): Number of results to return + + Returns: + collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection. + + Notes: + The collections are retrieved from the Elasticsearch database using the `client.search` method, + with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records. + The result is a generator of dictionaries containing the source data for each collection. + """ + db = self.client[self.db_name] + collections_collection = db[COLLECTIONS_INDEX] + + query: Dict[str, Any] = {} + + if token: + # Assuming token is the last seen item ID; adjust based on your pagination strategy + last_seen_id = json.loads(urlsafe_b64decode(token.encode()).decode()) + query = {"id": {"$gt": last_seen_id}} + + cursor = collections_collection.find(query).sort("id", 1).limit(limit) + collections = [] + async for collection in cursor: + collections.append(collection) + + return collections + + async def get_one_item(self, collection_id: str, item_id: str) -> Dict: + """Retrieve a single item from the database. + + Args: + collection_id (str): The id of the Collection that the Item belongs to. + item_id (str): The id of the Item. + + Returns: + item (Dict): A dictionary containing the source data for the Item. + + Raises: + NotFoundError: If the specified Item does not exist in the Collection. + + """ + db = self.client[self.db_name] + collection_name = index_by_collection_id(collection_id) + + try: + # Attempt to find the item in the specified collection + item = await db[collection_name].find_one({"id": item_id}) + if not item: + # If the item is not found, raise NotFoundError + raise NotFoundError( + f"Item {item_id} does not exist in Collection {collection_id}" + ) + return item + except Exception as e: + # Log and re-raise any exceptions encountered during the operation + logger.error( + f"An error occurred while retrieving item {item_id} from collection {collection_id}: {e}" + ) + raise + + @staticmethod + def make_search(): + """Database logic to create a Search instance.""" + # return Search().sort(*DEFAULT_SORT) + return MongoSearchAdapter() + + @staticmethod + def apply_ids_filter(search: MongoSearchAdapter, item_ids: List[str]): + """Database logic to search a list of STAC item ids.""" + search.add_filter({"_id": {"$in": item_ids}}) + return search + + @staticmethod + def apply_collections_filter(search: MongoSearchAdapter, collection_ids: List[str]): + """Database logic to search a list of STAC collection ids.""" + search.add_filter({"collection": {"$in": collection_ids}}) + return search + + @staticmethod + def apply_datetime_filter(search: MongoSearchAdapter, datetime_search): + """Apply a filter to search based on datetime field. + + Args: + search (Search): The search object to filter. + datetime_search (dict): The datetime filter criteria. + + Returns: + Search: The filtered search object. + """ + if "eq" in datetime_search: + search.add_filter({"properties.datetime": datetime_search["eq"]}) + else: + if "gte" in datetime_search: + search.add_filter( + {"properties.datetime": {"$gte": datetime_search["gte"]}} + ) + if "lte" in datetime_search: + search.add_filter( + {"properties.datetime": {"$lte": datetime_search["lte"]}} + ) + return search + + @staticmethod + def apply_bbox_filter(search: MongoSearchAdapter, bbox: List): + """Filter search results based on bounding box. + + Args: + search (Search): The search object to apply the filter to. + bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. + + Returns: + search (Search): The search object with the bounding box filter applied. + + Notes: + The bounding box is transformed into a polygon using the `bbox2polygon` function and + a geo_shape filter is added to the search object, set to intersect with the specified polygon. + """ + geojson_polygon = {"type": "Polygon", "coordinates": bbox2polygon(*bbox)} + return search.add_filter( + { + "geometry": { + "$geoIntersects": { + "$geometry": geojson_polygon, + } + } + } + ) + + @staticmethod + def apply_intersects_filter( + search: MongoSearchAdapter, + intersects: Geometry, + ): + """Filter search results based on intersecting geometry. + + Args: + search (Search): The search object to apply the filter to. + intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. + + Returns: + search (Search): The search object with the intersecting geometry filter applied. + + Notes: + A geo_shape filter is added to the search object, set to intersect with the specified geometry. + """ + return search.add_filter( + {"geometry": {"$geoIntersects": {"$geometry": intersects}}} + ) + + @staticmethod + def apply_stacql_filter( + search: MongoSearchAdapter, op: str, field: str, value: float + ): + """Filter search results based on a comparison between a field and a value. + + Args: + search (Search): The search object to apply the filter to. + op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), + 'lt' (less than), or 'lte' (less than or equal). + field (str): The field to perform the comparison on. + value (float): The value to compare the field against. + + Returns: + search (Search): The search object with the specified filter applied. + """ + # MongoDB comparison operators mapping + op_mapping = { + "eq": "$eq", + "gt": "$gt", + "gte": "$gte", + "lt": "$lt", + "lte": "$lte", + } + + # Construct the MongoDB filter + if op in op_mapping: + mongo_op = op_mapping[op] + filter_condition = {field: {mongo_op: value}} + else: + raise ValueError(f"Unsupported operation '{op}'") + + # Add the constructed filter to the search adapter's filters + return search.add_filter(filter_condition) + + @staticmethod + def translate_clause_to_mongo(clause: filter.Clause) -> dict: + """Translate a CQL2 Clause object to a MongoDB query. + + Args: + clause (Clause): The Clause object to translate. + + Returns: + dict: The translated MongoDB query. + """ + # This function needs to recursively translate CQL2 Clauses to MongoDB queries + # Here we demonstrate a simple example of handling an "eq" operator + if clause.op == filter.ComparisonOp.eq: + # Direct translation of an "eq" operation to MongoDB's query syntax + return {clause.args[0].property: {"$eq": clause.args[1]}} + elif clause.op == filter.SpatialIntersectsOp.s_intersects: + # Example of handling a spatial intersects operation + return { + clause.args[0].property: { + "$geoIntersects": { + "$geometry": clause.args[ + 1 + ].__geo_interface__ # Assuming args[1] is a GeoJSON-pydantic model + } + } + } + # Add additional elif blocks to handle other operators like "lt", "lte", "gt", "gte", "neq", etc. + else: + raise NotImplementedError( + f"Operator {clause.op} not implemented for MongoDB translation." + ) + + @staticmethod + def apply_cql2_filter( + search_adapter: MongoSearchAdapter, _filter: Optional[filter.Clause] + ): + """Adapt database logic to apply a CQL2 filter for MongoDB search endpoint. + + Args: + search_adapter (MongoSearchAdapter): The search adapter to which the filter will be applied. + _filter (Optional[Clause]): A Clause representing the filter criteria. + + Returns: + MongoSearchAdapter: The search adapter with the filter applied. + """ + if _filter is None: + return search_adapter + + # Translating the CQL2 Clause to a MongoDB query + try: + # Assuming _filter is a Clause object as defined above + mongo_query = DatabaseLogic.translate_clause_to_mongo(_filter) + search_adapter.add_filter(mongo_query) + except Exception as e: + # Handle translation errors or unsupported features + print(f"Error translating CQL2 Clause to MongoDB query: {e}") + + return search_adapter + + @staticmethod + def populate_sort(sortby: List[Dict[str, str]]) -> List[Tuple[str, int]]: + """ + Transform a list of sort criteria into the format expected by MongoDB. + + Args: + sortby (List[Dict[str, str]]): A list of dictionaries with 'field' and 'direction' keys, where + 'direction' can be 'asc' for ascending or 'desc' for descending. + + Returns: + List[Tuple[str, int]]: A list of tuples where each tuple is (fieldname, direction), with + direction being 1 for 'asc' and -1 for 'desc'. Returns an empty list + if no sort criteria are provided. + """ + if not sortby: + return [] + + # MongoDB expects a list of tuples for sorting. Each tuple is (Field Name, Direction) + # where Direction is 1 for ascending and -1 for descending. + mongo_sort = [] + for sort_field in sortby: + field = sort_field["field"] # The field name to sort by. + # Convert the direction to MongoDB's expected format. + direction = 1 if sort_field["direction"].lower() == "asc" else -1 + mongo_sort.append((field, direction)) + + return mongo_sort + + async def execute_search( + self, + search: MongoSearchAdapter, + limit: int, + token: Optional[str], + sort: Optional[Dict[str, Dict[str, str]]], + collection_ids: Optional[List[str]], + ignore_unavailable: bool = True, + ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: + """Execute a search query with limit and other optional parameters. + + Args: + search (Search): The search query to be executed. + limit (int): The maximum number of results to be returned. + token (Optional[str]): The token used to return the next set of results. + sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. + collection_ids (Optional[List[str]]): The collection ids to search. + ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. + + Returns: + Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: + - An iterable of search results, where each result is a dictionary with keys and values representing the + fields and values of each document. + - The total number of results (if the count could be computed), or None if the count could not be + computed. + - The token to be used to retrieve the next set of results, or None if there are no more results. + + Raises: + NotFoundError: If the collections specified in `collection_ids` do not exist. + """ + db = self.client[self.db_name] + collection = db["stac_items"] + query = {"$and": search.filters} if search.filters else {} + + if collection_ids: + query["collection"] = {"$in": collection_ids} + + sort_criteria = search.sort if search.sort else [("_id", 1)] # Default sort + + try: + if token: + last_id = ObjectId(base64.urlsafe_b64decode(token.encode()).decode()) + query["_id"] = {"$gt": last_id} + + cursor = collection.find(query).sort(sort_criteria).limit(limit + 1) + items = await cursor.to_list(length=limit + 1) + + next_token = None + if len(items) > limit: + next_token = base64.urlsafe_b64encode( + str(items[-1]["_id"]).encode() + ).decode() + items = items[:-1] + + maybe_count = None + if not token: + maybe_count = await collection.count_documents(query) + + return items, maybe_count, next_token + except PyMongoError as e: + print(f"Database operation failed: {e}") + raise + + """ TRANSACTION LOGIC """ + + async def check_collection_exists(self, collection_id: str): + """ + Check if a specific collection exists in the MongoDB database. + + This method uses MongoDB's `list_collection_names` command with a filter + to efficiently determine if a collection with the specified name exists. + It is more efficient than retrieving all collection names and searching through + them, especially beneficial in databases with a large number of collections. + + Args: + collection_id (str): The name of the collection to check for existence. + + Raises: + NotFoundError: If the collection specified by `collection_id` does not exist + in the database. + + Note: + The `NotFoundError` should be appropriately defined or imported in your + application to handle cases where the specified collection does not exist. + """ + db = self.client[self.db_name] + + # Check for the collection's existence by filtering list_collection_names + collections = db.list_collection_names(filter={"name": collection_id}) + if not collections: + raise NotFoundError(f"Collection {collection_id} does not exist") + + async def prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Preps an item for insertion into the MongoDB database. + + Args: + item (Item): The item to be prepped for insertion. + base_url (str): The base URL used to create the item's self URL. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The prepped item. + + Raises: + ConflictError: If the item already exists in the database and exist_ok is False. + NotFoundError: If the collection specified by the item does not exist. + """ + db = self.client[self.db_name] + collections_collection = db[COLLECTIONS_INDEX] + items_collection = db[index_by_collection_id(item.collection)] + + # Check if the collection exists + collection_exists = await collections_collection.count_documents( + {"id": item.collection}, limit=1 + ) + if not collection_exists: + raise NotFoundError(f"Collection {item.collection} does not exist") + + # Transform item using item_serializer for MongoDB compatibility + mongo_item = self.item_serializer.stac_to_db(item, base_url) + + if not exist_ok: + existing_item = await items_collection.find_one({"id": mongo_item["id"]}) + if existing_item: + raise ConflictError( + f"Item {mongo_item['id']} in collection {mongo_item['collection']} already exists" + ) + + # Return the transformed item ready for insertion + return mongo_item + + def sync_prep_create_item( + self, item: Item, base_url: str, exist_ok: bool = False + ) -> Item: + """ + Preps an item for insertion into the MongoDB database in a synchronous manner. + + Args: + item (Item): The item to be prepped for insertion. + base_url (str): The base URL used to create the item's self URL. + exist_ok (bool): Indicates whether the item can exist already. + + Returns: + Item: The prepped item. + + Raises: + ConflictError: If the item already exists in the database and exist_ok is False. + NotFoundError: If the collection specified by the item does not exist. + """ + db = self.sync_client[self.db_name] + collections_collection = db[COLLECTIONS_INDEX] + items_collection = db[index_by_collection_id(item.collection)] + + # Check if the collection exists + collection_exists = collections_collection.count_documents( + {"id": item.collection}, limit=1 + ) + if not collection_exists: + raise NotFoundError(f"Collection {item.collection} does not exist") + + # Transform item using item_serializer for MongoDB compatibility + mongo_item = self.item_serializer.stac_to_db(item, base_url) + + if not exist_ok: + existing_item = items_collection.find_one({"id": mongo_item["id"]}) + if existing_item: + raise ConflictError( + f"Item {mongo_item['id']} in collection {mongo_item['collection']} already exists" + ) + + # Return the transformed item ready for insertion + return mongo_item + + async def create_item(self, item: Item, refresh: bool = False): + """ + Asynchronously inserts a STAC item into MongoDB, ensuring the item does not already exist. + + Args: + item (Item): The STAC item to be created. + refresh (bool, optional): Not used for MongoDB, kept for compatibility with Elasticsearch interface. + + Raises: + ConflictError: If the item with the same ID already exists within the collection. + NotFoundError: If the specified collection does not exist in MongoDB. + """ + db = self.client[self.db_name] + items_collection = db[index_by_collection_id(item.collection)] + + # Convert STAC Item to a dictionary, preserving all its fields + item_dict = item.dict(by_alias=True) + + # Ensure the collection exists + collections_collection = db[COLLECTIONS_INDEX] + collection_exists = await collections_collection.count_documents( + {"id": item.collection}, limit=1 + ) + if collection_exists == 0: + raise NotFoundError(f"Collection {item.collection} does not exist") + + # Attempt to insert the item, checking for duplicates + try: + await items_collection.insert_one(item_dict) + except DuplicateKeyError: + raise ConflictError( + f"Item {item.id} in collection {item.collection} already exists" + ) + + async def delete_item( + self, item_id: str, collection_id: str, refresh: bool = False + ): + """ + Delete a single item from the database. + + Args: + item_id (str): The id of the Item to be deleted. + collection_id (str): The id of the Collection that the Item belongs to. + refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. + + Raises: + NotFoundError: If the Item does not exist in the database. + """ + db = self.client[self.db_name] + collection_name = index_by_collection_id( + collection_id + ) # Derive the MongoDB collection name + collection = db[collection_name] + + try: + # Attempt to delete the item from the collection + result = await collection.delete_one({"id": item_id}) + if result.deleted_count == 0: + # If no items were deleted, it means the item did not exist + raise NotFoundError( + f"Item {item_id} in collection {collection_id} not found" + ) + except PyMongoError as e: + # Catch any MongoDB error and re-raise as NotFoundError for consistency with the original function's behavior + raise NotFoundError( + f"Error deleting item {item_id} in collection {collection_id}: {e}" + ) + + async def create_collection(self, collection: Collection, refresh: bool = False): + """Create a single collection in the database. + + Args: + collection (Collection): The Collection object to be created. + refresh (bool, optional): Whether to refresh the index after the creation. Default is False. + + Raises: + ConflictError: If a Collection with the same id already exists in the database. + + Notes: + A new index is created for the items in the Collection using the `create_item_index` function. + """ + db = self.client[self.db_name] + collection_id = collection["id"] + + # Check if the collection already exists + if collection_id in await db.list_collection_names(): + raise ConflictError(f"Collection {collection_id} already exists") + + try: + # Since MongoDB creates collections when the first document is inserted, + # we can simulate collection creation by ensuring an index, such as on the 'id' field. + await db[collection_id].create_index([("id", 1)], unique=True) + except CollectionInvalid as e: + # If there's an error creating the index, it might indicate an issue with the collection. + # This block can be customized based on specific needs or errors. + raise ConflictError( + f"Error ensuring collection '{collection_id}' exists: {e}" + ) + except PyMongoError as e: + # Catch any other MongoDB error and raise a ConflictError for consistency with the original function's error handling. + raise ConflictError( + f"Error ensuring collection '{collection_id}' exists: {e}" + ) + + async def find_collection(self, collection_id: str) -> Collection: + """ + Find and return a collection from the database. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to be found. + + Returns: + Collection: The found collection, represented as a `Collection` object. + + Raises: + NotFoundError: If the collection with the given `collection_id` is not found in the database. + """ + db = self.client[self.db_name] + collections_collection = db[ + COLLECTIONS_INDEX + ] # Assuming COLLECTIONS_INDEX is defined elsewhere + + try: + collection = await collections_collection.find_one({"id": collection_id}) + if not collection: + raise NotFoundError(f"Collection {collection_id} not found") + return collection # Adjust this return according to how you want to use the Collection object + except PyMongoError as e: + # This is a general catch-all for MongoDB errors; adjust as needed for more specific handling + print(f"Failed to find collection {collection_id}: {e}") + raise NotFoundError(f"Collection {collection_id} not found") + + async def update_collection( + self, collection_id: str, collection: Collection, refresh: bool = False + ): + """ + Update a collection in the MongoDB database. + + Args: + collection_id (str): The ID of the collection to be updated. + collection (Collection): The new collection data to update. + refresh (bool): Not applicable for MongoDB, kept for compatibility. + + Raises: + NotFoundError: If the collection with the specified ID does not exist. + ConflictError: If attempting to change the collection ID to one that already exists. + """ + db = self.client.get_database() # Assuming the database is set in the client + collections_collection = db[ + COLLECTIONS_INDEX + ] # Assuming COLLECTIONS_INDEX is defined elsewhere + + existing_collection = await self.find_collection(collection_id) + if not existing_collection: + raise NotFoundError(f"Collection {collection_id} not found") + + if collection_id != collection["id"]: + # Check if the new ID already exists + new_id_exists = await collections_collection.find_one( + {"id": collection["id"]} + ) + if new_id_exists: + raise ConflictError( + f"Collection with ID {collection['id']} already exists" + ) + + # Update the collection ID in all related documents/items + items_collection = db[ITEMS_INDEX_PREFIX + collection_id] + await items_collection.update_many( + {}, {"$set": {"collection": collection["id"]}} + ) + + # Insert the new collection and delete the old one + await collections_collection.insert_one(collection) + await collections_collection.delete_one({"id": collection_id}) + + # Optionally, handle renaming or moving documents to a new collection if necessary + else: + # Update the existing collection with new data + await collections_collection.update_one( + {"id": collection_id}, {"$set": collection} + ) + + async def delete_collection(self, collection_id: str): + """ + Delete a collection from the MongoDB database and all items associated with it. + + Args: + collection_id (str): The ID of the collection to be deleted. + """ + db = self.client.get_database() # Get the database + + # Attempt to delete the collection document + collection_result = await db["collections"].delete_one({"id": collection_id}) + if collection_result.deleted_count == 0: + raise NotFoundError(f"Collection {collection_id} not found") + + # Delete all items associated with the collection + await db["items"].delete_many({"collection": collection_id}) + + async def bulk_async( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database asynchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The + `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the + index is refreshed after the bulk insert. The function does not return any value. + """ + db = self.client.get_database() + items_collection = db["items"] + + # Prepare the documents for insertion + documents = [item.dict(by_alias=True) for item in processed_items] + + try: + await items_collection.insert_many(documents, ordered=False) + except BulkWriteError as e: + # Handle bulk write errors, e.g., due to duplicate keys + raise ConflictError(f"Bulk insert operation failed: {e.details}") + + def bulk_sync( + self, collection_id: str, processed_items: List[Item], refresh: bool = False + ) -> None: + """Perform a bulk insert of items into the database synchronously. + + Args: + self: The instance of the object calling this function. + collection_id (str): The ID of the collection to which the items belong. + processed_items (List[Item]): A list of `Item` objects to be inserted into the database. + refresh (bool): Whether to refresh the index after the bulk insert (default: False). + + Notes: + This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The + insert is performed synchronously and blocking, meaning that the function does not return until the insert has + completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to + True, the index is refreshed after the bulk insert. The function does not return any value. + """ + db = self.sync_client.get_database() + items_collection = db["items"] + + # Prepare the documents for insertion + documents = [item.dict(by_alias=True) for item in processed_items] + + try: + items_collection.insert_many(documents, ordered=False) + except BulkWriteError as e: + # Handle bulk write errors, e.g., due to duplicate keys + raise ConflictError(f"Bulk insert operation failed: {e.details}") + + async def delete_items(self) -> None: + """ + Danger. this is only for tests. + + Deletes all items from the 'items' collection in MongoDB. + """ + db = self.client.get_default_database() # or get_database('yourdbname') + items_collection = db[ + "items" + ] # Assuming 'items' is the name of your items collection + + try: + await items_collection.delete_many({}) + print("All items have been deleted.") + except Exception as e: + print(f"Error deleting items: {e}") + + async def delete_collections(self) -> None: + """ + Danger. this is only for tests. + + Deletes all collections from the 'collections' collection in MongoDB. + """ + db = self.client.get_default_database() # or get_database('yourdbname') + collections_collection = db[ + "collections" + ] # Assuming 'collections' is the name of your collections collection + + try: + await collections_collection.delete_many({}) + print("All collections have been deleted.") + except Exception as e: + print(f"Error deleting collections: {e}") diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/version.py b/stac_fastapi/mongo/stac_fastapi/mongo/version.py similarity index 100% rename from stac_fastapi/mongo/stac_fastapi/opensearch/version.py rename to stac_fastapi/mongo/stac_fastapi/mongo/version.py diff --git a/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py deleted file mode 100644 index a946f82f..00000000 --- a/stac_fastapi/mongo/stac_fastapi/opensearch/database_logic.py +++ /dev/null @@ -1,908 +0,0 @@ -"""Database logic.""" -import asyncio -import logging -import os -from base64 import urlsafe_b64decode, urlsafe_b64encode -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union - -import attr -from opensearchpy import exceptions, helpers -from opensearchpy.exceptions import TransportError -from opensearchpy.helpers.query import Q -from opensearchpy.helpers.search import Search - -from stac_fastapi.core import serializers -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.utilities import bbox2polygon -from stac_fastapi.opensearch.config import ( - AsyncOpensearchSettings as AsyncSearchSettings, -) -from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings -from stac_fastapi.types.errors import ConflictError, NotFoundError -from stac_fastapi.types.stac import Collection, Item - -logger = logging.getLogger(__name__) - -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") -ES_INDEX_NAME_UNSUPPORTED_CHARS = { - "\\", - "/", - "*", - "?", - '"', - "<", - ">", - "|", - " ", - ",", - "#", - ":", -} - -ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" - -DEFAULT_SORT = { - "properties.datetime": {"order": "desc"}, - "id": {"order": "desc"}, - "collection": {"order": "desc"}, -} - -ES_ITEMS_SETTINGS = { - "index": { - "sort.field": list(DEFAULT_SORT.keys()), - "sort.order": [v["order"] for v in DEFAULT_SORT.values()], - } -} - -ES_MAPPINGS_DYNAMIC_TEMPLATES = [ - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - { - "descriptions": { - "match_mapping_type": "string", - "match": "description", - "mapping": {"type": "text"}, - } - }, - { - "titles": { - "match_mapping_type": "string", - "match": "title", - "mapping": {"type": "text"}, - } - }, - # Projection Extension https://github.com/stac-extensions/projection - {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, - { - "proj_projjson": { - "match": "proj:projjson", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "proj_centroid": { - "match": "proj:centroid", - "mapping": {"type": "geo_point"}, - } - }, - { - "proj_geometry": { - "match": "proj:geometry", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "no_index_href": { - "match": "href", - "mapping": {"type": "text", "index": False}, - } - }, - # Default all other strings not otherwise specified to keyword - {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, - {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, -] - -ES_ITEMS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "collection": {"type": "keyword"}, - "geometry": {"type": "geo_shape"}, - "assets": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "properties": { - "type": "object", - "properties": { - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, - "start_datetime": {"type": "date"}, - "end_datetime": {"type": "date"}, - "created": {"type": "date"}, - "updated": {"type": "date"}, - # Satellite Extension https://github.com/stac-extensions/sat - "sat:absolute_orbit": {"type": "integer"}, - "sat:relative_orbit": {"type": "integer"}, - }, - }, - }, -} - -ES_COLLECTIONS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "extent.spatial.bbox": {"type": "long"}, - "extent.temporal.interval": {"type": "date"}, - "providers": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "item_assets": {"type": "object", "enabled": False}, - }, -} - - -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None: - return ITEM_INDICES - else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) - - -async def create_collection_index() -> None: - """ - Create the index for a Collection. - - Returns: - None - - """ - client = AsyncSearchSettings().create_client - - search_body = { - "mappings": ES_COLLECTIONS_MAPPINGS, - "aliases": {COLLECTIONS_INDEX: {}}, - } - - index = f"{COLLECTIONS_INDEX}-000001" - - try: - await client.indices.create(index=index, body=search_body) - except TransportError as e: - if e.status_code == 400: - pass # Ignore 400 status codes - else: - raise e - - await client.close() - - -async def create_item_index(collection_id: str): - """ - Create the index for Items. - - Args: - collection_id (str): Collection identifier. - - Returns: - None - - """ - client = AsyncSearchSettings().create_client - index_name = index_by_collection_id(collection_id) - search_body = { - "aliases": {index_name: {}}, - "mappings": ES_ITEMS_MAPPINGS, - "settings": ES_ITEMS_SETTINGS, - } - - try: - await client.indices.create(index=f"{index_name}-000001", body=search_body) - except TransportError as e: - if e.status_code == 400: - pass # Ignore 400 status codes - else: - raise e - - await client.close() - - -async def delete_item_index(collection_id: str): - """Delete the index for items in a collection. - - Args: - collection_id (str): The ID of the collection whose items index will be deleted. - """ - client = AsyncSearchSettings().create_client - - name = index_by_collection_id(collection_id) - resolved = await client.indices.resolve_index(name=name) - if "aliases" in resolved and resolved["aliases"]: - [alias] = resolved["aliases"] - await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) - await client.indices.delete(index=alias["indices"]) - else: - await client.indices.delete(index=name) - await client.close() - - -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]): - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - return [ - { - "_index": index_by_collection_id(collection_id), - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, - } - for item in processed_items - ] - - -# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, -# So create our own Protocol for typing -# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - -@attr.s -class DatabaseLogic: - """Database logic.""" - - client = AsyncSearchSettings().create_client - sync_client = SyncSearchSettings().create_client - - item_serializer: Type[serializers.ItemSerializer] = attr.ib( - default=serializers.ItemSerializer - ) - collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( - default=serializers.CollectionSerializer - ) - - """CORE LOGIC""" - - async def get_all_collections( - self, - token: Optional[str], - limit: int, - ) -> Iterable[Dict[str, Any]]: - """Retrieve a list of all collections from the database. - - Args: - token (Optional[str]): The token used to return the next set of results. - limit (int): Number of results to return - - Returns: - collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection. - - Notes: - The collections are retrieved from the Elasticsearch database using the `client.search` method, - with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records. - The result is a generator of dictionaries containing the source data for each collection. - """ - search_body: Dict[str, Any] = {} - if token: - search_after = urlsafe_b64decode(token.encode()).decode().split(",") - search_body["search_after"] = search_after - - search_body["sort"] = {"id": {"order": "asc"}} - - collections = await self.client.search( - index=COLLECTIONS_INDEX, body=search_body, size=limit - ) - hits = collections["hits"]["hits"] - return hits - - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: - """Retrieve a single item from the database. - - Args: - collection_id (str): The id of the Collection that the Item belongs to. - item_id (str): The id of the Item. - - Returns: - item (Dict): A dictionary containing the source data for the Item. - - Raises: - NotFoundError: If the specified Item does not exist in the Collection. - - Notes: - The Item is retrieved from the Elasticsearch database using the `client.get` method, - with the index for the Collection as the target index and the combined `mk_item_id` as the document id. - """ - try: - item = await self.client.get( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" - ) - return item["_source"] - - @staticmethod - def make_search(): - """Database logic to create a Search instance.""" - return Search().sort(*DEFAULT_SORT) - - @staticmethod - def apply_ids_filter(search: Search, item_ids: List[str]): - """Database logic to search a list of STAC item ids.""" - return search.filter("terms", id=item_ids) - - @staticmethod - def apply_collections_filter(search: Search, collection_ids: List[str]): - """Database logic to search a list of STAC collection ids.""" - return search.filter("terms", collection=collection_ids) - - @staticmethod - def apply_datetime_filter(search: Search, datetime_search): - """Apply a filter to search based on datetime field. - - Args: - search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. - - Returns: - Search: The filtered search object. - """ - if "eq" in datetime_search: - search = search.filter( - "term", **{"properties__datetime": datetime_search["eq"]} - ) - else: - search = search.filter( - "range", properties__datetime={"lte": datetime_search["lte"]} - ) - search = search.filter( - "range", properties__datetime={"gte": datetime_search["gte"]} - ) - return search - - @staticmethod - def apply_bbox_filter(search: Search, bbox: List): - """Filter search results based on bounding box. - - Args: - search (Search): The search object to apply the filter to. - bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. - - Returns: - search (Search): The search object with the bounding box filter applied. - - Notes: - The bounding box is transformed into a polygon using the `bbox2polygon` function and - a geo_shape filter is added to the search object, set to intersect with the specified polygon. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": "polygon", - "coordinates": bbox2polygon(*bbox), - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_intersects_filter( - search: Search, - intersects: Geometry, - ): - """Filter search results based on intersecting geometry. - - Args: - search (Search): The search object to apply the filter to. - intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. - - Returns: - search (Search): The search object with the intersecting geometry filter applied. - - Notes: - A geo_shape filter is added to the search object, set to intersect with the specified geometry. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": intersects.type.lower(), - "coordinates": intersects.coordinates, - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_stacql_filter(search: Search, op: str, field: str, value: float): - """Filter search results based on a comparison between a field and a value. - - Args: - search (Search): The search object to apply the filter to. - op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), - 'lt' (less than), or 'lte' (less than or equal). - field (str): The field to perform the comparison on. - value (float): The value to compare the field against. - - Returns: - search (Search): The search object with the specified filter applied. - """ - if op != "eq": - key_filter = {field: {f"{op}": value}} - search = search.filter(Q("range", **key_filter)) - else: - search = search.filter("term", **{field: value}) - - return search - - @staticmethod - def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): - """Database logic to perform query for search endpoint.""" - if _filter is not None: - search = search.filter(filter.Clause.parse_obj(_filter).to_es()) - return search - - @staticmethod - def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: - """Database logic to sort search instance.""" - if sortby: - return {s.field: {"order": s.direction} for s in sortby} - else: - return None - - async def execute_search( - self, - search: Search, - limit: int, - token: Optional[str], - sort: Optional[Dict[str, Dict[str, str]]], - collection_ids: Optional[List[str]], - ignore_unavailable: bool = True, - ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: - """Execute a search query with limit and other optional parameters. - - Args: - search (Search): The search query to be executed. - limit (int): The maximum number of results to be returned. - token (Optional[str]): The token used to return the next set of results. - sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. - collection_ids (Optional[List[str]]): The collection ids to search. - ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. - - Returns: - Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: - - An iterable of search results, where each result is a dictionary with keys and values representing the - fields and values of each document. - - The total number of results (if the count could be computed), or None if the count could not be - computed. - - The token to be used to retrieve the next set of results, or None if there are no more results. - - Raises: - NotFoundError: If the collections specified in `collection_ids` do not exist. - """ - search_body: Dict[str, Any] = {} - query = search.query.to_dict() if search.query else None - if query: - search_body["query"] = query - if token: - search_after = urlsafe_b64decode(token.encode()).decode().split(",") - search_body["search_after"] = search_after - search_body["sort"] = sort if sort else DEFAULT_SORT - - index_param = indices(collection_ids) - - search_task = asyncio.create_task( - self.client.search( - index=index_param, - ignore_unavailable=ignore_unavailable, - body=search_body, - size=limit, - ) - ) - - count_task = asyncio.create_task( - self.client.count( - index=index_param, - ignore_unavailable=ignore_unavailable, - body=search.to_dict(count=True), - ) - ) - - try: - es_response = await search_task - except exceptions.NotFoundError: - raise NotFoundError(f"Collections '{collection_ids}' do not exist") - - hits = es_response["hits"]["hits"] - items = (hit["_source"] for hit in hits) - - next_token = None - if hits and (sort_array := hits[-1].get("sort")): - next_token = urlsafe_b64encode( - ",".join([str(x) for x in sort_array]).encode() - ).decode() - - # (1) count should not block returning results, so don't wait for it to be done - # (2) don't cancel the task so that it will populate the ES cache for subsequent counts - maybe_count = None - if count_task.done(): - try: - maybe_count = count_task.result().get("count") - except Exception as e: - logger.error(f"Count task failed: {e}") - - return items, maybe_count, next_token - - """ TRANSACTION LOGIC """ - - async def check_collection_exists(self, collection_id: str): - """Database logic to check if a collection exists.""" - if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - async def prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Preps an item for insertion into the database. - - Args: - item (Item): The item to be prepped for insertion. - base_url (str): The base URL used to create the item's self URL. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The prepped item. - - Raises: - ConflictError: If the item already exists in the database. - - """ - await self.check_collection_exists(collection_id=item["collection"]) - - if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), - id=mk_item_id(item["id"], item["collection"]), - ): - raise ConflictError( - f"Item {item['id']} in collection {item['collection']} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - def sync_prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Prepare an item for insertion into the database. - - This method performs pre-insertion preparation on the given `item`, - such as checking if the collection the item belongs to exists, - and optionally verifying that an item with the same ID does not already exist in the database. - - Args: - item (Item): The item to be inserted into the database. - base_url (str): The base URL used for constructing URLs for the item. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The item after preparation is done. - - Raises: - NotFoundError: If the collection that the item belongs to does not exist in the database. - ConflictError: If an item with the same ID already exists in the collection. - """ - item_id = item["id"] - collection_id = item["collection"] - if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ): - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - async def create_item(self, item: Item, refresh: bool = False): - """Database logic for creating one item. - - Args: - item (Item): The item to be created. - refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. - - Raises: - ConflictError: If the item already exists in the database. - - Returns: - None - """ - # todo: check if collection exists, but cache - item_id = item["id"] - collection_id = item["collection"] - es_resp = await self.client.index( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - body=item, - refresh=refresh, - ) - - if (meta := es_resp.get("meta")) and meta.get("status") == 409: - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - async def delete_item( - self, item_id: str, collection_id: str, refresh: bool = False - ): - """Delete a single item from the database. - - Args: - item_id (str): The id of the Item to be deleted. - collection_id (str): The id of the Collection that the Item belongs to. - refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. - - Raises: - NotFoundError: If the Item does not exist in the database. - """ - try: - await self.client.delete( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - refresh=refresh, - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} in collection {collection_id} not found" - ) - - async def create_collection(self, collection: Collection, refresh: bool = False): - """Create a single collection in the database. - - Args: - collection (Collection): The Collection object to be created. - refresh (bool, optional): Whether to refresh the index after the creation. Default is False. - - Raises: - ConflictError: If a Collection with the same id already exists in the database. - - Notes: - A new index is created for the items in the Collection using the `create_item_index` function. - """ - collection_id = collection["id"] - - if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise ConflictError(f"Collection {collection_id} already exists") - - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - body=collection, - refresh=refresh, - ) - - await create_item_index(collection_id) - - async def find_collection(self, collection_id: str) -> Collection: - """Find and return a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be found. - - Returns: - Collection: The found collection, represented as a `Collection` object. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function searches for a collection in the database using the specified `collection_id` and returns the found - collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. - """ - try: - collection = await self.client.get( - index=COLLECTIONS_INDEX, id=collection_id - ) - except exceptions.NotFoundError: - raise NotFoundError(f"Collection {collection_id} not found") - - return collection["_source"] - - async def update_collection( - self, collection_id: str, collection: Collection, refresh: bool = False - ): - """Update a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be updated. - collection (Collection): The Collection object to be used for the update. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not - found in the database. - - Notes: - This function updates the collection in the database using the specified - `collection_id` and with the collection specified in the `Collection` object. - If the collection is not found, a `NotFoundError` is raised. - """ - await self.find_collection(collection_id=collection_id) - - if collection_id != collection["id"]: - await self.create_collection(collection, refresh=refresh) - - await self.client.reindex( - body={ - "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, - "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, - "script": { - "lang": "painless", - "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", - }, - }, - wait_for_completion=True, - refresh=refresh, - ) - - await self.delete_collection(collection_id) - - else: - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - body=collection, - refresh=refresh, - ) - - async def delete_collection(self, collection_id: str, refresh: bool = False): - """Delete a collection from the database. - - Parameters: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be deleted. - refresh (bool): Whether to refresh the index after the deletion (default: False). - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function first verifies that the collection with the specified `collection_id` exists in the database, and then - deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this - function also calls `delete_item_index` to delete the index for the items in the collection. - """ - await self.find_collection(collection_id=collection_id) - await self.client.delete( - index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh - ) - await delete_item_index(collection_id) - - async def bulk_async( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database asynchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The - `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the - index is refreshed after the bulk insert. The function does not return any value. - """ - await helpers.async_bulk( - self.client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - def bulk_sync( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database synchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed synchronously and blocking, meaning that the function does not return until the insert has - completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to - True, the index is refreshed after the bulk insert. The function does not return any value. - """ - helpers.bulk( - self.sync_client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - # DANGER - async def delete_items(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=ITEM_INDICES, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) - - # DANGER - async def delete_collections(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=COLLECTIONS_INDEX, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) From 7b2a764ce51321234e1b559cb66afe5260b98c7a Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 20 Feb 2024 13:01:02 +0800 Subject: [PATCH 06/25] app starts --- docker-compose.yml | 10 ++-- stac_fastapi/mongo/setup.cfg | 2 +- stac_fastapi/mongo/setup.py | 3 +- stac_fastapi/mongo/stac_fastapi/mongo/app.py | 4 +- .../mongo/stac_fastapi/mongo/config.py | 47 ++++++++++--------- .../stac_fastapi/mongo/database_logic.py | 7 +-- 6 files changed, 40 insertions(+), 33 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index be90928c..cb7adc09 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -116,8 +116,8 @@ services: image: mongo:7.0.5 hostname: mongo environment: - - MONGO_INITDB_ROOT_USERNAME: root - - MONGO_INITDB_ROOT_PASSWORD: example + - MONGO_INITDB_ROOT_USERNAME=root + - MONGO_INITDB_ROOT_PASSWORD=example ports: - "27017:27017" @@ -127,6 +127,6 @@ services: ports: - "8081:8081" environment: - - ME_CONFIG_MONGODB_ADMINUSERNAME: root - - ME_CONFIG_MONGODB_ADMINPASSWORD: example - - ME_CONFIG_MONGODB_URL: mongodb://root:example@mongo:27017/ \ No newline at end of file + - ME_CONFIG_MONGODB_ADMINUSERNAME=root + - ME_CONFIG_MONGODB_ADMINPASSWORD=example + - ME_CONFIG_MONGODB_URL=mongodb://root:example@mongo:27017/ \ No newline at end of file diff --git a/stac_fastapi/mongo/setup.cfg b/stac_fastapi/mongo/setup.cfg index 9f0be4b7..a234c94b 100644 --- a/stac_fastapi/mongo/setup.cfg +++ b/stac_fastapi/mongo/setup.cfg @@ -1,2 +1,2 @@ [metadata] -version = attr: stac_fastapi.opensearch.version.__version__ +version = attr: stac_fastapi.mongo.version.__version__ diff --git a/stac_fastapi/mongo/setup.py b/stac_fastapi/mongo/setup.py index 5be4ad41..159c3803 100644 --- a/stac_fastapi/mongo/setup.py +++ b/stac_fastapi/mongo/setup.py @@ -8,7 +8,8 @@ install_requires = [ "stac-fastapi.core==2.0.0", "motor==3.3.2", - "pymongo==4.6.1" "uvicorn", + "pymongo==4.6.1", + "uvicorn", "starlette", ] diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/app.py b/stac_fastapi/mongo/stac_fastapi/mongo/app.py index 0d845c50..e7ac8f49 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/app.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/app.py @@ -22,7 +22,7 @@ from stac_fastapi.mongo.config import AsyncMongoDBSettings from stac_fastapi.mongo.database_logic import DatabaseLogic, create_collection_index -settings = AsyncMongoDBSettings +settings = AsyncMongoDBSettings() session = Session.create_from_settings(settings) filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) @@ -79,7 +79,7 @@ def run() -> None: import uvicorn uvicorn.run( - "stac_fastapi.opensearch.app:app", + "stac_fastapi.mongo.app:app", host=settings.app_host, port=settings.app_port, log_level="info", diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/config.py b/stac_fastapi/mongo/stac_fastapi/mongo/config.py index e9e12a1a..94fb1df7 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/config.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/config.py @@ -1,6 +1,7 @@ """API configuration.""" import os -from typing import Set +import ssl +from typing import Any, Dict, Set from motor.motor_asyncio import AsyncIOMotorClient from pymongo import MongoClient @@ -8,58 +9,62 @@ from stac_fastapi.types.config import ApiSettings -def _mongodb_uri() -> str: - # MongoDB connection URI construction +def _mongodb_config() -> Dict[str, Any]: + # MongoDB connection URI and client options user = os.getenv("MONGO_USER") password = os.getenv("MONGO_PASS") host = os.getenv("MONGO_HOST", "localhost") port = os.getenv("MONGO_PORT", "27017") - database = os.getenv( - "MONGO_DB", "admin" - ) # Default to admin database for authentication + database = os.getenv("MONGO_DB", "stac") # Default to 'stac' database use_ssl = os.getenv("MONGO_USE_SSL", "false").lower() == "true" - ssl_cert_reqs = ( - "CERT_NONE" - if os.getenv("MONGO_VERIFY_CERTS", "false").lower() == "false" - else "CERT_REQUIRED" - ) + verify_certs = os.getenv("MONGO_VERIFY_CERTS", "true").lower() == "true" + + ssl_cert_reqs = ssl.CERT_REQUIRED if verify_certs else ssl.CERT_NONE # Adjust URI based on whether using SRV record or not if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): - # SRV connection string format does not use port uri = f"mongodb+srv://{user}:{password}@{host}/{database}?retryWrites=true&w=majority" else: - # Standard connection string format with port uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?retryWrites=true" if use_ssl: - uri += f"&ssl=true&ssl_cert_reqs={ssl_cert_reqs}" + uri += "&ssl=true&ssl_cert_reqs={}".format(ssl_cert_reqs) + + # Initialize the configuration dictionary + config = { + "uri": uri, + "database": database, + # MongoDB does not use headers, but added here for structure alignment + "headers": {}, # Placeholder for consistency + } - return uri + return config _forbidden_fields: Set[str] = {"type"} class MongoDBSettings(ApiSettings): - """API settings.""" + """MongoDB specific API settings.""" forbidden_fields: Set[str] = _forbidden_fields indexed_fields: Set[str] = {"datetime"} @property def create_client(self) -> MongoClient: - """Create MongoDB client.""" - return MongoClient(_mongodb_uri()) + """Create a synchronous MongoDB client.""" + config = _mongodb_config() + return MongoClient(config["uri"]) class AsyncMongoDBSettings(ApiSettings): - """API settings.""" + """Async MongoDB specific API settings.""" forbidden_fields: Set[str] = _forbidden_fields indexed_fields: Set[str] = {"datetime"} @property def create_client(self) -> AsyncIOMotorClient: - """Create async MongoDB client.""" - return AsyncIOMotorClient(_mongodb_uri()) + """Create an asynchronous MongoDB client.""" + config = _mongodb_config() + return AsyncIOMotorClient(config["uri"]) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index 6732b43d..ae276687 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -70,8 +70,9 @@ async def create_collection_index(): Returns: None """ - client = AsyncSearchSettings.create_client() + client = AsyncSearchSettings().create_client db = client[DATABASE] + # db = client.get_database(DATABASE) try: await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) @@ -93,7 +94,7 @@ async def create_item_index(collection_id: str): Returns: None """ - client = AsyncSearchSettings.create_client() + client = AsyncSearchSettings.create_client db = client[DATABASE] # Derive the collection name for items based on the collection_id @@ -123,7 +124,7 @@ async def delete_item_index(collection_id: str): Args: collection_id (str): The ID of the collection whose associated MongoDB collection will be dropped. """ - client = AsyncSearchSettings.create_client() + client = AsyncSearchSettings.create_client db = client[DATABASE] # Derive the MongoDB collection name using the collection ID From 4c93b45e18971e8fa36862e3f5373c7d8781110f Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Tue, 20 Feb 2024 19:11:47 +0800 Subject: [PATCH 07/25] root route --- README.md | 2 +- docker-compose.yml | 42 ++++++++++--------- stac_fastapi/mongo/stac_fastapi/mongo/app.py | 2 + .../mongo/stac_fastapi/mongo/config.py | 14 +++++-- .../stac_fastapi/mongo/database_logic.py | 42 ++++++++++++++----- 5 files changed, 66 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 7c662480..08782637 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # stac-fastapi-elasticsearch-opensearch (sfeos) -## Elasticsearch and Opensearch backends for the stac-fastapi project +## Elasticsearch, Opensearch and Mongo backends for the stac-fastapi project [![PyPI version](https://badge.fury.io/py/stac-fastapi.elasticsearch.svg)](https://badge.fury.io/py/stac-fastapi.elasticsearch) diff --git a/docker-compose.yml b/docker-compose.yml index cb7adc09..696ac303 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,8 +73,10 @@ services: - ENVIRONMENT=local - BACKEND=mongo - MONGO_DB=stac + - MONGO_HOST=mongo - MONGO_USER=root - MONGO_PASS=example + - MONGO_PORT=27017 ports: - "8084:8084" volumes: @@ -85,6 +87,26 @@ services: command: bash -c "./scripts/wait-for-it-es.sh mongo-container:27017 && python -m stac_fastapi.mongo.app" + mongo: + container_name: mongo-container + image: mongo:7.0.5 + hostname: mongo + environment: + - MONGO_INITDB_ROOT_USERNAME=root + - MONGO_INITDB_ROOT_PASSWORD=example + ports: + - "27017:27017" + + mongo-express: + image: mongo-express + restart: always + ports: + - "8081:8081" + environment: + - ME_CONFIG_MONGODB_ADMINUSERNAME=root + - ME_CONFIG_MONGODB_ADMINPASSWORD=example + - ME_CONFIG_MONGODB_URL=mongodb://root:example@mongo:27017/ + elasticsearch: container_name: es-container image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.11.0} @@ -110,23 +132,3 @@ services: - ./opensearch/snapshots:/usr/share/opensearch/snapshots ports: - "9202:9202" - - mongo: - container_name: mongo-container - image: mongo:7.0.5 - hostname: mongo - environment: - - MONGO_INITDB_ROOT_USERNAME=root - - MONGO_INITDB_ROOT_PASSWORD=example - ports: - - "27017:27017" - - mongo-express: - image: mongo-express - restart: always - ports: - - "8081:8081" - environment: - - ME_CONFIG_MONGODB_ADMINUSERNAME=root - - ME_CONFIG_MONGODB_ADMINPASSWORD=example - - ME_CONFIG_MONGODB_URL=mongodb://root:example@mongo:27017/ \ No newline at end of file diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/app.py b/stac_fastapi/mongo/stac_fastapi/mongo/app.py index e7ac8f49..298148df 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/app.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/app.py @@ -78,6 +78,8 @@ def run() -> None: try: import uvicorn + print("host: ", settings.app_host) + print("port: ", settings.app_port) uvicorn.run( "stac_fastapi.mongo.app:app", host=settings.app_host, diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/config.py b/stac_fastapi/mongo/stac_fastapi/mongo/config.py index 94fb1df7..8dbc4f58 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/config.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/config.py @@ -15,17 +15,22 @@ def _mongodb_config() -> Dict[str, Any]: password = os.getenv("MONGO_PASS") host = os.getenv("MONGO_HOST", "localhost") port = os.getenv("MONGO_PORT", "27017") - database = os.getenv("MONGO_DB", "stac") # Default to 'stac' database + # database = os.getenv("MONGO_DB", "stac") # Default to 'stac' database use_ssl = os.getenv("MONGO_USE_SSL", "false").lower() == "true" verify_certs = os.getenv("MONGO_VERIFY_CERTS", "true").lower() == "true" ssl_cert_reqs = ssl.CERT_REQUIRED if verify_certs else ssl.CERT_NONE # Adjust URI based on whether using SRV record or not + # if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): + # uri = f"mongodb+srv://{user}:{password}@{host}/{database}?retryWrites=true&w=majority" + # else: + # uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?retryWrites=true" + if "mongodb+srv" in os.getenv("MONGO_CONNECTION_STRING", ""): - uri = f"mongodb+srv://{user}:{password}@{host}/{database}?retryWrites=true&w=majority" + uri = f"mongodb+srv://{user}:{password}@{host}?retryWrites=true&w=majority" else: - uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?retryWrites=true" + uri = f"mongodb://{user}:{password}@{host}:{port}?retryWrites=true" if use_ssl: uri += "&ssl=true&ssl_cert_reqs={}".format(ssl_cert_reqs) @@ -33,7 +38,7 @@ def _mongodb_config() -> Dict[str, Any]: # Initialize the configuration dictionary config = { "uri": uri, - "database": database, + # "database": database, # MongoDB does not use headers, but added here for structure alignment "headers": {}, # Placeholder for consistency } @@ -67,4 +72,5 @@ class AsyncMongoDBSettings(ApiSettings): def create_client(self) -> AsyncIOMotorClient: """Create an asynchronous MongoDB client.""" config = _mongodb_config() + print(config) return AsyncIOMotorClient(config["uri"]) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index ae276687..621496a3 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -71,17 +71,37 @@ async def create_collection_index(): None """ client = AsyncSearchSettings().create_client - db = client[DATABASE] - # db = client.get_database(DATABASE) + if client: + try: + db = client[DATABASE] + await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) + print("Index created successfully.") + except Exception as e: + print(f"An error occurred while creating the index: {e}") + finally: + print(f"Closing client: {client}") + client.close() + else: + print("Failed to create MongoDB client.") - try: - await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) - print("Index created successfully.") - except Exception as e: - # Handle exceptions, which could be due to existing index conflicts, etc. - print(f"An error occurred while creating the index: {e}") - finally: - await client.close() + # async with AsyncSearchSettings() as client: + # db = client[DATABASE] + # await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) + # print("Index created successfully.") + + # db = client[DATABASE] + # print("HELLO") + # print(client) + # # db = client.get_database(DATABASE) + + # try: + # await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) + # print("Index created successfully.") + # except Exception as e: + # # Handle exceptions, which could be due to existing index conflicts, etc. + # print(f"An error occurred while creating the index: {e}") + # finally: + # await client.close() async def create_item_index(collection_id: str): @@ -254,7 +274,7 @@ async def get_all_collections( with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records. The result is a generator of dictionaries containing the source data for each collection. """ - db = self.client[self.db_name] + db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] query: Dict[str, Any] = {} From 824e39723c1555a9c582f176ab11fefc1c9804de Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 21 Feb 2024 05:46:47 +0800 Subject: [PATCH 08/25] post get collection --- .../stac_fastapi/mongo/database_logic.py | 132 +++++++----------- .../mongo/stac_fastapi/mongo/utilities.py | 13 ++ stac_fastapi/tests/conftest.py | 4 + stac_fastapi/tests/resources/test_item.py | 2 + 4 files changed, 67 insertions(+), 84 deletions(-) create mode 100644 stac_fastapi/mongo/stac_fastapi/mongo/utilities.py diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index 621496a3..d717a95a 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -8,9 +8,8 @@ import attr from bson import ObjectId -from pymongo.errors import ( +from pymongo.errors import ( # CollectionInvalid, BulkWriteError, - CollectionInvalid, DuplicateKeyError, PyMongoError, ) @@ -20,6 +19,7 @@ from stac_fastapi.core.utilities import bbox2polygon from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSearchSettings from stac_fastapi.mongo.config import MongoDBSettings as SyncSearchSettings +from stac_fastapi.mongo.utilities import serialize_doc from stac_fastapi.types.errors import ConflictError, NotFoundError from stac_fastapi.types.stac import Collection, Item @@ -28,6 +28,7 @@ NumType = Union[float, int] COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX = os.getenv("STAC_ITEMS_INDEX", "items") ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") DATABASE = os.getenv("MONGO_DB", "admin") @@ -84,25 +85,6 @@ async def create_collection_index(): else: print("Failed to create MongoDB client.") - # async with AsyncSearchSettings() as client: - # db = client[DATABASE] - # await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) - # print("Index created successfully.") - - # db = client[DATABASE] - # print("HELLO") - # print(client) - # # db = client.get_database(DATABASE) - - # try: - # await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) - # print("Index created successfully.") - # except Exception as e: - # # Handle exceptions, which could be due to existing index conflicts, etc. - # print(f"An error occurred while creating the index: {e}") - # finally: - # await client.close() - async def create_item_index(collection_id: str): """ @@ -249,10 +231,6 @@ class DatabaseLogic: default=serializers.CollectionSerializer ) - def __init__(self): - """Init.""" - self.db_name = os.getenv("MONGO_DB", "default_db_name") - """CORE LOGIC""" async def get_all_collections( @@ -305,7 +283,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: NotFoundError: If the specified Item does not exist in the Collection. """ - db = self.client[self.db_name] + db = self.client[DATABASE] collection_name = index_by_collection_id(collection_id) try: @@ -565,7 +543,7 @@ async def execute_search( Raises: NotFoundError: If the collections specified in `collection_ids` do not exist. """ - db = self.client[self.db_name] + db = self.client[DATABASE] collection = db["stac_items"] query = {"$and": search.filters} if search.filters else {} @@ -620,7 +598,7 @@ async def check_collection_exists(self, collection_id: str): The `NotFoundError` should be appropriately defined or imported in your application to handle cases where the specified collection does not exist. """ - db = self.client[self.db_name] + db = self.client[DATABASE] # Check for the collection's existence by filtering list_collection_names collections = db.list_collection_names(filter={"name": collection_id}) @@ -645,16 +623,16 @@ async def prep_create_item( ConflictError: If the item already exists in the database and exist_ok is False. NotFoundError: If the collection specified by the item does not exist. """ - db = self.client[self.db_name] + db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] - items_collection = db[index_by_collection_id(item.collection)] + items_collection = db[ITEMS_INDEX] # Check if the collection exists collection_exists = await collections_collection.count_documents( - {"id": item.collection}, limit=1 + {"id": item["collection"]}, limit=1 ) if not collection_exists: - raise NotFoundError(f"Collection {item.collection} does not exist") + raise NotFoundError(f"Collection {item['collection']} does not exist") # Transform item using item_serializer for MongoDB compatibility mongo_item = self.item_serializer.stac_to_db(item, base_url) @@ -687,7 +665,7 @@ def sync_prep_create_item( ConflictError: If the item already exists in the database and exist_ok is False. NotFoundError: If the collection specified by the item does not exist. """ - db = self.sync_client[self.db_name] + db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] items_collection = db[index_by_collection_id(item.collection)] @@ -723,26 +701,26 @@ async def create_item(self, item: Item, refresh: bool = False): ConflictError: If the item with the same ID already exists within the collection. NotFoundError: If the specified collection does not exist in MongoDB. """ - db = self.client[self.db_name] - items_collection = db[index_by_collection_id(item.collection)] + db = self.client[DATABASE] + items_collection = db[ITEMS_INDEX] # Convert STAC Item to a dictionary, preserving all its fields - item_dict = item.dict(by_alias=True) + # item_dict = item.dict(by_alias=True) # Ensure the collection exists collections_collection = db[COLLECTIONS_INDEX] collection_exists = await collections_collection.count_documents( - {"id": item.collection}, limit=1 + {"id": item["collection"]}, limit=1 ) if collection_exists == 0: - raise NotFoundError(f"Collection {item.collection} does not exist") + raise NotFoundError(f"Collection {item['collection']} does not exist") # Attempt to insert the item, checking for duplicates try: - await items_collection.insert_one(item_dict) + await items_collection.insert_one(item) except DuplicateKeyError: raise ConflictError( - f"Item {item.id} in collection {item.collection} already exists" + f"Item {item['id']} in collection {item['collection']} already exists" ) async def delete_item( @@ -759,7 +737,7 @@ async def delete_item( Raises: NotFoundError: If the Item does not exist in the database. """ - db = self.client[self.db_name] + db = self.client[DATABASE] collection_name = index_by_collection_id( collection_id ) # Derive the MongoDB collection name @@ -780,7 +758,7 @@ async def delete_item( ) async def create_collection(self, collection: Collection, refresh: bool = False): - """Create a single collection in the database. + """Create a single collection document in the database. Args: collection (Collection): The Collection object to be created. @@ -788,34 +766,26 @@ async def create_collection(self, collection: Collection, refresh: bool = False) Raises: ConflictError: If a Collection with the same id already exists in the database. - - Notes: - A new index is created for the items in the Collection using the `create_item_index` function. """ - db = self.client[self.db_name] - collection_id = collection["id"] + db = self.client[DATABASE] + collections_collection = db[COLLECTIONS_INDEX] # Check if the collection already exists - if collection_id in await db.list_collection_names(): - raise ConflictError(f"Collection {collection_id} already exists") + existing_collection = await collections_collection.find_one( + {"id": collection["id"]} + ) + if existing_collection: + raise ConflictError(f"Collection {collection['id']} already exists") try: - # Since MongoDB creates collections when the first document is inserted, - # we can simulate collection creation by ensuring an index, such as on the 'id' field. - await db[collection_id].create_index([("id", 1)], unique=True) - except CollectionInvalid as e: - # If there's an error creating the index, it might indicate an issue with the collection. - # This block can be customized based on specific needs or errors. - raise ConflictError( - f"Error ensuring collection '{collection_id}' exists: {e}" - ) + # Insert the new collection document into the collections collection + await collections_collection.insert_one(collection) except PyMongoError as e: - # Catch any other MongoDB error and raise a ConflictError for consistency with the original function's error handling. - raise ConflictError( - f"Error ensuring collection '{collection_id}' exists: {e}" - ) + # Catch any MongoDB error and raise an appropriate error + print(f"Failed to create collection {collection['id']}: {e}") + raise ConflictError(f"Failed to create collection {collection['id']}: {e}") - async def find_collection(self, collection_id: str) -> Collection: + async def find_collection(self, collection_id: str) -> dict: """ Find and return a collection from the database. @@ -824,21 +794,21 @@ async def find_collection(self, collection_id: str) -> Collection: collection_id (str): The ID of the collection to be found. Returns: - Collection: The found collection, represented as a `Collection` object. + dict: The found collection, represented as a dictionary. Raises: NotFoundError: If the collection with the given `collection_id` is not found in the database. """ - db = self.client[self.db_name] - collections_collection = db[ - COLLECTIONS_INDEX - ] # Assuming COLLECTIONS_INDEX is defined elsewhere + db = self.client[DATABASE] + collections_collection = db[COLLECTIONS_INDEX] try: collection = await collections_collection.find_one({"id": collection_id}) if not collection: raise NotFoundError(f"Collection {collection_id} not found") - return collection # Adjust this return according to how you want to use the Collection object + serialized_collection = serialize_doc(collection) + print("HELLO") + return serialized_collection except PyMongoError as e: # This is a general catch-all for MongoDB errors; adjust as needed for more specific handling print(f"Failed to find collection {collection_id}: {e}") @@ -859,10 +829,8 @@ async def update_collection( NotFoundError: If the collection with the specified ID does not exist. ConflictError: If attempting to change the collection ID to one that already exists. """ - db = self.client.get_database() # Assuming the database is set in the client - collections_collection = db[ - COLLECTIONS_INDEX - ] # Assuming COLLECTIONS_INDEX is defined elsewhere + db = self.client[DATABASE] + collections_collection = db[COLLECTIONS_INDEX] existing_collection = await self.find_collection(collection_id) if not existing_collection: @@ -902,7 +870,7 @@ async def delete_collection(self, collection_id: str): Args: collection_id (str): The ID of the collection to be deleted. """ - db = self.client.get_database() # Get the database + db = self.client[DATABASE] # Attempt to delete the collection document collection_result = await db["collections"].delete_one({"id": collection_id}) @@ -929,7 +897,7 @@ async def bulk_async( `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the index is refreshed after the bulk insert. The function does not return any value. """ - db = self.client.get_database() + db = self.client[DATABASE] items_collection = db["items"] # Prepare the documents for insertion @@ -958,7 +926,7 @@ def bulk_sync( completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the index is refreshed after the bulk insert. The function does not return any value. """ - db = self.sync_client.get_database() + db = self.sync_client[DATABASE] items_collection = db["items"] # Prepare the documents for insertion @@ -976,10 +944,8 @@ async def delete_items(self) -> None: Deletes all items from the 'items' collection in MongoDB. """ - db = self.client.get_default_database() # or get_database('yourdbname') - items_collection = db[ - "items" - ] # Assuming 'items' is the name of your items collection + db = self.client[DATABASE] + items_collection = db["items"] try: await items_collection.delete_many({}) @@ -993,10 +959,8 @@ async def delete_collections(self) -> None: Deletes all collections from the 'collections' collection in MongoDB. """ - db = self.client.get_default_database() # or get_database('yourdbname') - collections_collection = db[ - "collections" - ] # Assuming 'collections' is the name of your collections collection + db = self.client[DATABASE] + collections_collection = db["collections"] try: await collections_collection.delete_many({}) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py new file mode 100644 index 00000000..612e8999 --- /dev/null +++ b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py @@ -0,0 +1,13 @@ +from bson import ObjectId + +def serialize_doc(doc): + """Recursively convert ObjectId to string in MongoDB documents.""" + if isinstance(doc, dict): + for k, v in doc.items(): + if isinstance(v, ObjectId): + doc[k] = str(v) # Convert ObjectId to string + elif isinstance(v, dict) or isinstance(v, list): + doc[k] = serialize_doc(v) # Recurse into sub-docs/lists + elif isinstance(doc, list): + doc = [serialize_doc(item) for item in doc] # Apply to each item in a list + return doc diff --git a/stac_fastapi/tests/conftest.py b/stac_fastapi/tests/conftest.py index 01160ee1..108d0a56 100644 --- a/stac_fastapi/tests/conftest.py +++ b/stac_fastapi/tests/conftest.py @@ -24,6 +24,10 @@ DatabaseLogic, create_collection_index, ) +elif os.getenv("BACKEND", "elasticsearch").lower() == "mongo": + from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSettings + from stac_fastapi.mongo.config import MongoDBSettings as SearchSettings + from stac_fastapi.mongo.database_logic import DatabaseLogic, create_collection_index else: from stac_fastapi.elasticsearch.config import ( ElasticsearchSettings as SearchSettings, diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 958d0703..341089fd 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -20,6 +20,8 @@ if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": from stac_fastapi.opensearch.database_logic import DatabaseLogic +elif os.getenv("BACKEND", "elasticsearch").lower() == "mongo": + from stac_fastapi.mongo.database_logic import DatabaseLogic else: from stac_fastapi.elasticsearch.database_logic import DatabaseLogic From 7787c17e27729791f6028bb0174196ef5a380676 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Wed, 21 Feb 2024 15:24:45 +0800 Subject: [PATCH 09/25] sort, bbox --- .../stac_fastapi/mongo/database_logic.py | 206 +++++++++--------- .../mongo/stac_fastapi/mongo/utilities.py | 63 ++++++ 2 files changed, 163 insertions(+), 106 deletions(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index d717a95a..c487b9ff 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -1,25 +1,23 @@ """Database logic.""" import base64 -import json import logging import os -from base64 import urlsafe_b64decode from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union import attr from bson import ObjectId -from pymongo.errors import ( # CollectionInvalid, - BulkWriteError, - DuplicateKeyError, - PyMongoError, -) +from pymongo.errors import BulkWriteError, PyMongoError from stac_fastapi.core import serializers from stac_fastapi.core.extensions import filter from stac_fastapi.core.utilities import bbox2polygon from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSearchSettings from stac_fastapi.mongo.config import MongoDBSettings as SyncSearchSettings -from stac_fastapi.mongo.utilities import serialize_doc +from stac_fastapi.mongo.utilities import ( + adapt_mongodb_docs_for_es_sorted, + decode_token, + serialize_doc, +) from stac_fastapi.types.errors import ConflictError, NotFoundError from stac_fastapi.types.stac import Collection, Item @@ -238,19 +236,16 @@ async def get_all_collections( token: Optional[str], limit: int, ) -> Iterable[Dict[str, Any]]: - """Retrieve a list of all collections from the database. + """ + Retrieve a list of all collections from the database, adapted to mimic Elasticsearch's document structure. Args: token (Optional[str]): The token used to return the next set of results. limit (int): Number of results to return Returns: - collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection. - - Notes: - The collections are retrieved from the Elasticsearch database using the `client.search` method, - with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records. - The result is a generator of dictionaries containing the source data for each collection. + collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection, + with each document nested under a '_source' key. """ db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] @@ -258,16 +253,16 @@ async def get_all_collections( query: Dict[str, Any] = {} if token: - # Assuming token is the last seen item ID; adjust based on your pagination strategy - last_seen_id = json.loads(urlsafe_b64decode(token.encode()).decode()) + last_seen_id = decode_token(token) query = {"id": {"$gt": last_seen_id}} cursor = collections_collection.find(query).sort("id", 1).limit(limit) - collections = [] - async for collection in cursor: - collections.append(collection) + collections = await cursor.to_list(length=limit) - return collections + # Adapt the MongoDB documents to mimic Elasticsearch's document structure + adapted_collections = adapt_mongodb_docs_for_es_sorted(collections) + + return adapted_collections async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """Retrieve a single item from the database. @@ -284,17 +279,19 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """ db = self.client[DATABASE] - collection_name = index_by_collection_id(collection_id) + collection = db[ITEMS_INDEX] try: # Attempt to find the item in the specified collection - item = await db[collection_name].find_one({"id": item_id}) + item = await collection.find_one({"id": item_id}) if not item: # If the item is not found, raise NotFoundError - raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" - ) - return item + raise NotFoundError(f"Item {item_id} does not exist.") + + # Serialize the MongoDB document to make it JSON serializable + serialized_item = serialize_doc(item) + + return serialized_item except Exception as e: # Log and re-raise any exceptions encountered during the operation logger.error( @@ -306,6 +303,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: def make_search(): """Database logic to create a Search instance.""" # return Search().sort(*DEFAULT_SORT) + print("make_search hello") return MongoSearchAdapter() @staticmethod @@ -360,7 +358,7 @@ def apply_bbox_filter(search: MongoSearchAdapter, bbox: List): a geo_shape filter is added to the search object, set to intersect with the specified polygon. """ geojson_polygon = {"type": "Polygon", "coordinates": bbox2polygon(*bbox)} - return search.add_filter( + search.add_filter( { "geometry": { "$geoIntersects": { @@ -369,6 +367,7 @@ def apply_bbox_filter(search: MongoSearchAdapter, bbox: List): } } ) + return search @staticmethod def apply_intersects_filter( @@ -387,9 +386,10 @@ def apply_intersects_filter( Notes: A geo_shape filter is added to the search object, set to intersect with the specified geometry. """ - return search.add_filter( + search = search.add_filter( {"geometry": {"$geoIntersects": {"$geometry": intersects}}} ) + return search @staticmethod def apply_stacql_filter( @@ -424,7 +424,8 @@ def apply_stacql_filter( raise ValueError(f"Unsupported operation '{op}'") # Add the constructed filter to the search adapter's filters - return search.add_filter(filter_condition) + search = search.add_filter(filter_condition) + return search @staticmethod def translate_clause_to_mongo(clause: filter.Clause) -> dict: @@ -491,24 +492,22 @@ def populate_sort(sortby: List[Dict[str, str]]) -> List[Tuple[str, int]]: Transform a list of sort criteria into the format expected by MongoDB. Args: - sortby (List[Dict[str, str]]): A list of dictionaries with 'field' and 'direction' keys, where - 'direction' can be 'asc' for ascending or 'desc' for descending. + sortby (List[SortExtension]): A list of SortExtension objects with 'field' + and 'direction' attributes. Returns: - List[Tuple[str, int]]: A list of tuples where each tuple is (fieldname, direction), with - direction being 1 for 'asc' and -1 for 'desc'. Returns an empty list - if no sort criteria are provided. + List[Tuple[str, int]]: A list of tuples where each tuple is (fieldname, direction), + with direction being 1 for 'asc' and -1 for 'desc'. + Returns an empty list if no sort criteria are provided. """ if not sortby: return [] - # MongoDB expects a list of tuples for sorting. Each tuple is (Field Name, Direction) - # where Direction is 1 for ascending and -1 for descending. mongo_sort = [] - for sort_field in sortby: - field = sort_field["field"] # The field name to sort by. - # Convert the direction to MongoDB's expected format. - direction = 1 if sort_field["direction"].lower() == "asc" else -1 + for sort_extension in sortby: + field = sort_extension.field + # Convert the direction enum to a string, then to MongoDB's expected format + direction = 1 if sort_extension.direction.value == "asc" else -1 mongo_sort.append((field, direction)) return mongo_sort @@ -544,7 +543,7 @@ async def execute_search( NotFoundError: If the collections specified in `collection_ids` do not exist. """ db = self.client[DATABASE] - collection = db["stac_items"] + collection = db[ITEMS_INDEX] query = {"$and": search.filters} if search.filters else {} if collection_ids: @@ -580,31 +579,58 @@ async def execute_search( async def check_collection_exists(self, collection_id: str): """ - Check if a specific collection exists in the MongoDB database. + Check if a specific STAC collection exists within the MongoDB database. - This method uses MongoDB's `list_collection_names` command with a filter - to efficiently determine if a collection with the specified name exists. - It is more efficient than retrieving all collection names and searching through - them, especially beneficial in databases with a large number of collections. + This method queries the MongoDB collection specified by COLLECTIONS_INDEX to determine + if a document with the specified collection_id exists. Args: - collection_id (str): The name of the collection to check for existence. + collection_id (str): The ID of the STAC collection to check for existence. Raises: - NotFoundError: If the collection specified by `collection_id` does not exist - in the database. - - Note: - The `NotFoundError` should be appropriately defined or imported in your - application to handle cases where the specified collection does not exist. + NotFoundError: If the STAC collection specified by `collection_id` does not exist + within the MongoDB collection defined by COLLECTIONS_INDEX. """ db = self.client[DATABASE] + collections_collection = db[COLLECTIONS_INDEX] - # Check for the collection's existence by filtering list_collection_names - collections = db.list_collection_names(filter={"name": collection_id}) - if not collections: + # Query the collections collection to see if a document with the specified collection_id exists + collection_exists = await collections_collection.find_one({"id": collection_id}) + if not collection_exists: raise NotFoundError(f"Collection {collection_id} does not exist") + async def create_item(self, item: Item, refresh: bool = False): + """ + Asynchronously inserts a STAC item into MongoDB, ensuring the item does not already exist. + + Args: + item (Item): The STAC item to be created. + refresh (bool, optional): Not used for MongoDB, kept for compatibility with Elasticsearch interface. + + Raises: + ConflictError: If the item with the same ID already exists within the collection. + NotFoundError: If the specified collection does not exist in MongoDB. + """ + db = self.client[DATABASE] + items_collection = db[ITEMS_INDEX] + collections_collection = db[COLLECTIONS_INDEX] + + collection_exists = await collections_collection.count_documents( + {"id": item["collection"]}, limit=1 + ) + if not collection_exists: + raise NotFoundError(f"Collection {item['collection']} does not exist") + + new_item = item.copy() + new_item["_id"] = item.get("_id", ObjectId()) + + existing_item = await items_collection.find_one({"_id": new_item["_id"]}) + if existing_item: + raise ConflictError(f"Item with _id {item['_id']} already exists") + + await items_collection.insert_one(new_item) + item = serialize_doc(item) + async def prep_create_item( self, item: Item, base_url: str, exist_ok: bool = False ) -> Item: @@ -645,7 +671,7 @@ async def prep_create_item( ) # Return the transformed item ready for insertion - return mongo_item + return serialize_doc(mongo_item) def sync_prep_create_item( self, item: Item, base_url: str, exist_ok: bool = False @@ -667,7 +693,7 @@ def sync_prep_create_item( """ db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] - items_collection = db[index_by_collection_id(item.collection)] + items_collection = db[ITEMS_INDEX] # Check if the collection exists collection_exists = collections_collection.count_documents( @@ -689,40 +715,6 @@ def sync_prep_create_item( # Return the transformed item ready for insertion return mongo_item - async def create_item(self, item: Item, refresh: bool = False): - """ - Asynchronously inserts a STAC item into MongoDB, ensuring the item does not already exist. - - Args: - item (Item): The STAC item to be created. - refresh (bool, optional): Not used for MongoDB, kept for compatibility with Elasticsearch interface. - - Raises: - ConflictError: If the item with the same ID already exists within the collection. - NotFoundError: If the specified collection does not exist in MongoDB. - """ - db = self.client[DATABASE] - items_collection = db[ITEMS_INDEX] - - # Convert STAC Item to a dictionary, preserving all its fields - # item_dict = item.dict(by_alias=True) - - # Ensure the collection exists - collections_collection = db[COLLECTIONS_INDEX] - collection_exists = await collections_collection.count_documents( - {"id": item["collection"]}, limit=1 - ) - if collection_exists == 0: - raise NotFoundError(f"Collection {item['collection']} does not exist") - - # Attempt to insert the item, checking for duplicates - try: - await items_collection.insert_one(item) - except DuplicateKeyError: - raise ConflictError( - f"Item {item['id']} in collection {item['collection']} already exists" - ) - async def delete_item( self, item_id: str, collection_id: str, refresh: bool = False ): @@ -738,14 +730,11 @@ async def delete_item( NotFoundError: If the Item does not exist in the database. """ db = self.client[DATABASE] - collection_name = index_by_collection_id( - collection_id - ) # Derive the MongoDB collection name - collection = db[collection_name] + items_collection = db[ITEMS_INDEX] try: # Attempt to delete the item from the collection - result = await collection.delete_one({"id": item_id}) + result = await items_collection.delete_one({"id": item_id}) if result.deleted_count == 0: # If no items were deleted, it means the item did not exist raise NotFoundError( @@ -785,6 +774,8 @@ async def create_collection(self, collection: Collection, refresh: bool = False) print(f"Failed to create collection {collection['id']}: {e}") raise ConflictError(f"Failed to create collection {collection['id']}: {e}") + collection = serialize_doc(collection) + async def find_collection(self, collection_id: str) -> dict: """ Find and return a collection from the database. @@ -807,7 +798,6 @@ async def find_collection(self, collection_id: str) -> dict: if not collection: raise NotFoundError(f"Collection {collection_id} not found") serialized_collection = serialize_doc(collection) - print("HELLO") return serialized_collection except PyMongoError as e: # This is a general catch-all for MongoDB errors; adjust as needed for more specific handling @@ -847,7 +837,7 @@ async def update_collection( ) # Update the collection ID in all related documents/items - items_collection = db[ITEMS_INDEX_PREFIX + collection_id] + items_collection = db[ITEMS_INDEX] await items_collection.update_many( {}, {"$set": {"collection": collection["id"]}} ) @@ -871,14 +861,18 @@ async def delete_collection(self, collection_id: str): collection_id (str): The ID of the collection to be deleted. """ db = self.client[DATABASE] + collections_collection = db[COLLECTIONS_INDEX] + items_collection = db[ITEMS_INDEX] # Attempt to delete the collection document - collection_result = await db["collections"].delete_one({"id": collection_id}) + collection_result = await collections_collection.delete_one( + {"id": collection_id} + ) if collection_result.deleted_count == 0: raise NotFoundError(f"Collection {collection_id} not found") # Delete all items associated with the collection - await db["items"].delete_many({"collection": collection_id}) + await items_collection.delete_many({"collection": collection_id}) async def bulk_async( self, collection_id: str, processed_items: List[Item], refresh: bool = False @@ -898,7 +892,7 @@ async def bulk_async( index is refreshed after the bulk insert. The function does not return any value. """ db = self.client[DATABASE] - items_collection = db["items"] + items_collection = db[ITEMS_INDEX] # Prepare the documents for insertion documents = [item.dict(by_alias=True) for item in processed_items] @@ -927,7 +921,7 @@ def bulk_sync( True, the index is refreshed after the bulk insert. The function does not return any value. """ db = self.sync_client[DATABASE] - items_collection = db["items"] + items_collection = db[ITEMS_INDEX] # Prepare the documents for insertion documents = [item.dict(by_alias=True) for item in processed_items] @@ -945,7 +939,7 @@ async def delete_items(self) -> None: Deletes all items from the 'items' collection in MongoDB. """ db = self.client[DATABASE] - items_collection = db["items"] + items_collection = db[ITEMS_INDEX] try: await items_collection.delete_many({}) @@ -960,7 +954,7 @@ async def delete_collections(self) -> None: Deletes all collections from the 'collections' collection in MongoDB. """ db = self.client[DATABASE] - collections_collection = db["collections"] + collections_collection = db[COLLECTIONS_INDEX] try: await collections_collection.delete_many({}) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py index 612e8999..626ed87a 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py @@ -1,5 +1,11 @@ +"""utilities for stac-fastapi.mongo.""" + +from base64 import urlsafe_b64decode, urlsafe_b64encode +from typing import Any, Dict, Iterable + from bson import ObjectId + def serialize_doc(doc): """Recursively convert ObjectId to string in MongoDB documents.""" if isinstance(doc, dict): @@ -11,3 +17,60 @@ def serialize_doc(doc): elif isinstance(doc, list): doc = [serialize_doc(item) for item in doc] # Apply to each item in a list return doc + + +def adapt_mongodb_docs_for_es( + docs: Iterable[Dict[str, Any]] +) -> Iterable[Dict[str, Any]]: + """ + Adapts MongoDB documents to mimic Elasticsearch's document structure. + + Converts ObjectId instances to strings. + + Args: + docs (Iterable[Dict[str, Any]]): A list of dictionaries representing MongoDB documents. + + Returns: + Iterable[Dict[str, Any]]: A list of adapted dictionaries with each original document + nested under a '_source' key, and ObjectId instances converted to strings. + """ + adapted_docs = [{"_source": serialize_doc(doc)} for doc in docs] + return adapted_docs + + +def adapt_mongodb_docs_for_es_sorted( + docs: Iterable[Dict[str, Any]] +) -> Iterable[Dict[str, Any]]: + """ + Adapt MongoDB documents to mimic Elasticsearch's document structure. + + Args: + docs (Iterable[Dict[str, Any]]): The original MongoDB documents. + + Returns: + Iterable[Dict[str, Any]]: Adapted documents, each nested under a '_source' key. + """ + adapted_docs = [] + for doc in docs: + # Optionally, remove MongoDB's '_id' field if not needed in the output + doc.pop("_id", None) + + adapted_doc = { + "_source": doc, + # Assuming 'id' is unique and can be used for sorting and pagination + "sort": [doc["id"]], + } + adapted_docs.append(serialize_doc(adapted_doc)) + return adapted_docs + + +def decode_token(encoded_token: str) -> str: + """Decode a base64 string back to its original token value.""" + token_value = urlsafe_b64decode(encoded_token.encode()).decode() + return token_value + + +def encode_token(token_value: str) -> str: + """Encode a token value (e.g., a UUID or cursor) as a base64 string.""" + encoded_token = urlsafe_b64encode(token_value.encode()).decode() + return encoded_token From 2a4f72e573a44de7c17633be55f9622288440ea0 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 23 Feb 2024 18:20:39 +0800 Subject: [PATCH 10/25] update all collections --- stac_fastapi/core/stac_fastapi/core/core.py | 66 ++++++------------- .../stac_fastapi/mongo/database_logic.py | 58 +++++++++------- .../mongo/stac_fastapi/mongo/utilities.py | 46 ------------- 3 files changed, 53 insertions(+), 117 deletions(-) diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 63c43944..b959b7a6 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -1,7 +1,6 @@ """Item crud client.""" import logging import re -from base64 import urlsafe_b64encode from datetime import datetime as datetime_type from datetime import timezone from typing import Any, Dict, List, Optional, Set, Type, Union @@ -193,66 +192,36 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: async def all_collections(self, **kwargs) -> Collections: """Read all collections from the database. - Returns: - Collections: A `Collections` object containing all the collections in the database and - links to various resources. + Args: + **kwargs: Keyword arguments from the request. - Raises: - Exception: If any error occurs while reading the collections from the database. + Returns: + A Collections object containing all the collections in the database and links to various resources. """ - request: Request = kwargs["request"] - base_url = str(kwargs["request"].base_url) + request = kwargs["request"] + base_url = str(request.base_url) + limit = int(request.query_params.get("limit", 10)) + token = request.query_params.get("token") - limit = ( - int(request.query_params["limit"]) - if "limit" in request.query_params - else 10 - ) - token = ( - request.query_params["token"] if "token" in request.query_params else None + collections, next_token = await self.database.get_all_collections( + token=token, limit=limit, base_url=base_url ) - hits = await self.database.get_all_collections(limit=limit, token=token) - - next_search_after = None - next_link = None - if len(hits) == limit: - last_hit = hits[-1] - next_search_after = last_hit["sort"] - next_token = urlsafe_b64encode( - ",".join(map(str, next_search_after)).encode() - ).decode() - paging_links = PagingLinks(next=next_token, request=request) - next_link = paging_links.link_next() - links = [ - { - "rel": Relations.root.value, - "type": MimeTypes.json, - "href": base_url, - }, - { - "rel": Relations.parent.value, - "type": MimeTypes.json, - "href": base_url, - }, + {"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url}, + {"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url}, { "rel": Relations.self.value, "type": MimeTypes.json, - "href": urljoin(base_url, "collections"), + "href": f"{base_url}collections", }, ] - if next_link: + if next_token: + next_link = PagingLinks(next=next_token, request=request).link_next() links.append(next_link) - return Collections( - collections=[ - self.collection_serializer.db_to_stac(c["_source"], base_url=base_url) - for c in hits - ], - links=links, - ) + return Collections(collections=collections, links=links) async def get_collection(self, collection_id: str, **kwargs) -> Collection: """Get a collection from the database by its id. @@ -565,9 +534,12 @@ async def post_search( search = self.database.apply_bbox_filter(search=search, bbox=bbox) if search_request.intersects: + print("INTERSECTS: HELLO") + print("SEARCH1: ", search) search = self.database.apply_intersects_filter( search=search, intersects=search_request.intersects ) + print("SEARCH2: ", search) if search_request.query: for field_name, expr in search_request.query.items(): diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index c487b9ff..83bfc03a 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -11,13 +11,10 @@ from stac_fastapi.core import serializers from stac_fastapi.core.extensions import filter from stac_fastapi.core.utilities import bbox2polygon +from stac_fastapi.extensions.core import SortExtension from stac_fastapi.mongo.config import AsyncMongoDBSettings as AsyncSearchSettings from stac_fastapi.mongo.config import MongoDBSettings as SyncSearchSettings -from stac_fastapi.mongo.utilities import ( - adapt_mongodb_docs_for_es_sorted, - decode_token, - serialize_doc, -) +from stac_fastapi.mongo.utilities import decode_token, encode_token, serialize_doc from stac_fastapi.types.errors import ConflictError, NotFoundError from stac_fastapi.types.stac import Collection, Item @@ -232,37 +229,47 @@ class DatabaseLogic: """CORE LOGIC""" async def get_all_collections( - self, - token: Optional[str], - limit: int, - ) -> Iterable[Dict[str, Any]]: + self, token: Optional[str], limit: int, base_url: str + ) -> Tuple[List[Dict[str, Any]], Optional[str]]: """ - Retrieve a list of all collections from the database, adapted to mimic Elasticsearch's document structure. + Retrieve a list of all collections from the MongoDB database, supporting pagination. Args: - token (Optional[str]): The token used to return the next set of results. - limit (int): Number of results to return + token (Optional[str]): The pagination token, which is the ID of the last collection in the previous page. + limit (int): The maximum number of results to return. + base_url (str): The base URL for constructing fully qualified links. Returns: - collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection, - with each document nested under a '_source' key. + Tuple[List[Dict[str, Any]], Optional[str]]: A tuple containing a list of collections + and an optional next token for pagination. """ db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] query: Dict[str, Any] = {} - if token: last_seen_id = decode_token(token) query = {"id": {"$gt": last_seen_id}} - cursor = collections_collection.find(query).sort("id", 1).limit(limit) - collections = await cursor.to_list(length=limit) + cursor = collections_collection.find(query).sort("id", 1).limit(limit + 1) + collections = await cursor.to_list(length=limit + 1) + + # Check if we have more items to paginate through + next_token = None + if len(collections) > limit: + # Use the ID of the last item in the list as the next token + next_token = encode_token(collections[-1]["id"]) + collections = collections[ + :-1 + ] # Remove the extra item used to check for next page - # Adapt the MongoDB documents to mimic Elasticsearch's document structure - adapted_collections = adapt_mongodb_docs_for_es_sorted(collections) + # Serialize MongoDB documents to STAC-compliant collections + serialized_collections = [ + self.collection_serializer.db_to_stac(serialize_doc(collection), base_url) + for collection in collections + ] - return adapted_collections + return serialized_collections, next_token async def get_one_item(self, collection_id: str, item_id: str) -> Dict: """Retrieve a single item from the database. @@ -386,9 +393,10 @@ def apply_intersects_filter( Notes: A geo_shape filter is added to the search object, set to intersect with the specified geometry. """ - search = search.add_filter( - {"geometry": {"$geoIntersects": {"$geometry": intersects}}} - ) + print("intersect: ", search) + print("intersects geometry: ", intersects) + search.add_filter({"geometry": {"$geoIntersects": {"$geometry": intersects}}}) + print("intersect: ", search) return search @staticmethod @@ -487,7 +495,7 @@ def apply_cql2_filter( return search_adapter @staticmethod - def populate_sort(sortby: List[Dict[str, str]]) -> List[Tuple[str, int]]: + def populate_sort(sortby: List[SortExtension]) -> List[Tuple[str, int]]: """ Transform a list of sort criteria into the format expected by MongoDB. @@ -544,6 +552,8 @@ async def execute_search( """ db = self.client[DATABASE] collection = db[ITEMS_INDEX] + print("Search: ", search) + print("Filters: ", search.filters) query = {"$and": search.filters} if search.filters else {} if collection_ids: diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py index 626ed87a..1c862440 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/utilities.py @@ -1,7 +1,6 @@ """utilities for stac-fastapi.mongo.""" from base64 import urlsafe_b64decode, urlsafe_b64encode -from typing import Any, Dict, Iterable from bson import ObjectId @@ -19,51 +18,6 @@ def serialize_doc(doc): return doc -def adapt_mongodb_docs_for_es( - docs: Iterable[Dict[str, Any]] -) -> Iterable[Dict[str, Any]]: - """ - Adapts MongoDB documents to mimic Elasticsearch's document structure. - - Converts ObjectId instances to strings. - - Args: - docs (Iterable[Dict[str, Any]]): A list of dictionaries representing MongoDB documents. - - Returns: - Iterable[Dict[str, Any]]: A list of adapted dictionaries with each original document - nested under a '_source' key, and ObjectId instances converted to strings. - """ - adapted_docs = [{"_source": serialize_doc(doc)} for doc in docs] - return adapted_docs - - -def adapt_mongodb_docs_for_es_sorted( - docs: Iterable[Dict[str, Any]] -) -> Iterable[Dict[str, Any]]: - """ - Adapt MongoDB documents to mimic Elasticsearch's document structure. - - Args: - docs (Iterable[Dict[str, Any]]): The original MongoDB documents. - - Returns: - Iterable[Dict[str, Any]]: Adapted documents, each nested under a '_source' key. - """ - adapted_docs = [] - for doc in docs: - # Optionally, remove MongoDB's '_id' field if not needed in the output - doc.pop("_id", None) - - adapted_doc = { - "_source": doc, - # Assuming 'id' is unique and can be used for sorting and pagination - "sort": [doc["id"]], - } - adapted_docs.append(serialize_doc(adapted_doc)) - return adapted_docs - - def decode_token(encoded_token: str) -> str: """Decode a base64 string back to its original token value.""" token_value = urlsafe_b64decode(encoded_token.encode()).decode() From f905d35d29a58284cd5ecfc950d349e95d466950 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 23 Feb 2024 18:59:37 +0800 Subject: [PATCH 11/25] fix intersects filter --- stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index 83bfc03a..e9cea00e 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -393,10 +393,10 @@ def apply_intersects_filter( Notes: A geo_shape filter is added to the search object, set to intersect with the specified geometry. """ - print("intersect: ", search) - print("intersects geometry: ", intersects) - search.add_filter({"geometry": {"$geoIntersects": {"$geometry": intersects}}}) - print("intersect: ", search) + geometry_dict = {"type": intersects.type, "coordinates": intersects.coordinates} + search.add_filter( + {"geometry": {"$geoIntersects": {"$geometry": geometry_dict}}} + ) return search @staticmethod From 0b2d16014cac4c63bef5859c91b11f2b67504401 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Fri, 23 Feb 2024 19:32:01 +0800 Subject: [PATCH 12/25] fix sort filter --- .../stac_fastapi/mongo/database_logic.py | 250 +++++++++--------- 1 file changed, 123 insertions(+), 127 deletions(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index e9cea00e..e5941073 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -24,117 +24,117 @@ COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") ITEMS_INDEX = os.getenv("STAC_ITEMS_INDEX", "items") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +# ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") DATABASE = os.getenv("MONGO_DB", "admin") -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - unsupported_chars = set('/\\ ."*<>:|?$') - sanitized = "".join(c for c in collection_id if c not in unsupported_chars) - return f"{ITEMS_INDEX_PREFIX}{sanitized.lower()}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None: - return COLLECTIONS_INDEX - else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) - - -async def create_collection_index(): - """ - Ensure indexes for the collections collection in MongoDB using the asynchronous client. - - Returns: - None - """ - client = AsyncSearchSettings().create_client - if client: - try: - db = client[DATABASE] - await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) - print("Index created successfully.") - except Exception as e: - print(f"An error occurred while creating the index: {e}") - finally: - print(f"Closing client: {client}") - client.close() - else: - print("Failed to create MongoDB client.") - - -async def create_item_index(collection_id: str): - """ - Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. - - Args: - collection_id (str): Collection identifier used to derive the MongoDB collection name for items. - - Returns: - None - """ - client = AsyncSearchSettings.create_client - db = client[DATABASE] - - # Derive the collection name for items based on the collection_id - collection_name = index_by_collection_id(collection_id) - - try: - await db[collection_name].create_index([("properties.datetime", -1)]) - await db[collection_name].create_index([("id", 1)], unique=True) - await db[collection_name].create_index([("geometry", "2dsphere")]) - print(f"Indexes created successfully for collection: {collection_name}.") - except Exception as e: - # Handle exceptions, which could be due to existing index conflicts, etc. - print( - f"An error occurred while creating indexes for collection {collection_name}: {e}" - ) - finally: - await client.close() - - -async def delete_item_index(collection_id: str): - """ - Drop the MongoDB collection corresponding to the specified collection ID. - - This operation is the MongoDB equivalent of deleting an Elasticsearch index, removing both the data and - the structure for the specified collection's items. - - Args: - collection_id (str): The ID of the collection whose associated MongoDB collection will be dropped. - """ - client = AsyncSearchSettings.create_client - db = client[DATABASE] - - # Derive the MongoDB collection name using the collection ID - collection_name = index_by_collection_id(collection_id) - - try: - # Drop the collection, removing both its data and structure - await db[collection_name].drop() - logger.info(f"Collection '{collection_name}' successfully dropped.") - except Exception as e: - logger.error(f"Error dropping collection '{collection_name}': {e}") - finally: - await client.close() +# def index_by_collection_id(collection_id: str) -> str: +# """ +# Translate a collection id into an Elasticsearch index name. + +# Args: +# collection_id (str): The collection id to translate into an index name. + +# Returns: +# str: The index name derived from the collection id. +# """ +# unsupported_chars = set('/\\ ."*<>:|?$') +# sanitized = "".join(c for c in collection_id if c not in unsupported_chars) +# return f"{ITEMS_INDEX_PREFIX}{sanitized.lower()}" + + +# def indices(collection_ids: Optional[List[str]]) -> str: +# """ +# Get a comma-separated string of index names for a given list of collection ids. + +# Args: +# collection_ids: A list of collection ids. + +# Returns: +# A string of comma-separated index names. If `collection_ids` is None, returns the default indices. +# """ +# if collection_ids is None: +# return COLLECTIONS_INDEX +# else: +# return ",".join([index_by_collection_id(c) for c in collection_ids]) + + +# async def create_collection_index(): +# """ +# Ensure indexes for the collections collection in MongoDB using the asynchronous client. + +# Returns: +# None +# """ +# client = AsyncSearchSettings().create_client +# if client: +# try: +# db = client[DATABASE] +# await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) +# print("Index created successfully.") +# except Exception as e: +# print(f"An error occurred while creating the index: {e}") +# finally: +# print(f"Closing client: {client}") +# client.close() +# else: +# print("Failed to create MongoDB client.") + + +# async def create_item_index(collection_id: str): +# """ +# Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. + +# Args: +# collection_id (str): Collection identifier used to derive the MongoDB collection name for items. + +# Returns: +# None +# """ +# client = AsyncSearchSettings.create_client +# db = client[DATABASE] + +# # Derive the collection name for items based on the collection_id +# collection_name = index_by_collection_id(collection_id) + +# try: +# await db[collection_name].create_index([("properties.datetime", -1)]) +# await db[collection_name].create_index([("id", 1)], unique=True) +# await db[collection_name].create_index([("geometry", "2dsphere")]) +# print(f"Indexes created successfully for collection: {collection_name}.") +# except Exception as e: +# # Handle exceptions, which could be due to existing index conflicts, etc. +# print( +# f"An error occurred while creating indexes for collection {collection_name}: {e}" +# ) +# finally: +# await client.close() + + +# async def delete_item_index(collection_id: str): +# """ +# Drop the MongoDB collection corresponding to the specified collection ID. + +# This operation is the MongoDB equivalent of deleting an Elasticsearch index, removing both the data and +# the structure for the specified collection's items. + +# Args: +# collection_id (str): The ID of the collection whose associated MongoDB collection will be dropped. +# """ +# client = AsyncSearchSettings.create_client +# db = client[DATABASE] + +# # Derive the MongoDB collection name using the collection ID +# collection_name = index_by_collection_id(collection_id) + +# try: +# # Drop the collection, removing both its data and structure +# await db[collection_name].drop() +# logger.info(f"Collection '{collection_name}' successfully dropped.") +# except Exception as e: +# logger.error(f"Error dropping collection '{collection_name}': {e}") +# finally: +# await client.close() def mk_item_id(item_id: str, collection_id: str): @@ -179,9 +179,7 @@ def __init__(self): are retrieved first. """ self.filters = [] - # MongoDB uses a list of tuples for sorting: [('field1', direction), ('field2', direction)] - # Convert the DEFAULT_SORT dict to this format, considering MongoDB's sorting capabilities - self.sort = [("properties.datetime", -1), ("id", -1), ("collection", -1)] + # self.sort = [("properties.datetime", -1), ("id", -1), ("collection", -1)] def add_filter(self, filter_condition): """ @@ -195,21 +193,21 @@ def add_filter(self, filter_condition): """ self.filters.append(filter_condition) - def set_sort(self, sort_conditions): - """ - Set the sorting criteria for the query based on provided conditions. + # def set_sort(self, sort_conditions): + # """ + # Set the sorting criteria for the query based on provided conditions. - This method translates a dictionary of field names and sort directions (asc or desc) into MongoDB's - format for sorting queries. It overwrites any existing sort criteria with the new criteria provided. + # This method translates a dictionary of field names and sort directions (asc or desc) into MongoDB's + # format for sorting queries. It overwrites any existing sort criteria with the new criteria provided. - Args: - sort_conditions (dict): A dictionary where keys are field names and values are dictionaries - indicating sort direction ('asc' for ascending or 'desc' for descending). - """ - self.sort = [] - for field, details in sort_conditions.items(): - direction = 1 if details["order"] == "asc" else -1 - self.sort.append((field, direction)) + # Args: + # sort_conditions (dict): A dictionary where keys are field names and values are dictionaries + # indicating sort direction ('asc' for ascending or 'desc' for descending). + # """ + # self.sort = [] + # for field, details in sort_conditions.items(): + # direction = 1 if details["order"] == "asc" else -1 + # self.sort.append((field, direction)) @attr.s @@ -552,15 +550,13 @@ async def execute_search( """ db = self.client[DATABASE] collection = db[ITEMS_INDEX] - print("Search: ", search) print("Filters: ", search.filters) query = {"$and": search.filters} if search.filters else {} if collection_ids: query["collection"] = {"$in": collection_ids} - sort_criteria = search.sort if search.sort else [("_id", 1)] # Default sort - + sort_criteria = sort if sort else [("_id", 1)] # Default sort try: if token: last_id = ObjectId(base64.urlsafe_b64decode(token.encode()).decode()) From 491e39f98536b148ef81805197ea4650d26b4d8b Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 24 Feb 2024 22:59:09 +0800 Subject: [PATCH 13/25] translate cql2 json --- stac_fastapi/mongo/stac_fastapi/mongo/app.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/app.py b/stac_fastapi/mongo/stac_fastapi/mongo/app.py index 298148df..59823985 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/app.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/app.py @@ -20,7 +20,11 @@ ) from stac_fastapi.extensions.third_party import BulkTransactionExtension from stac_fastapi.mongo.config import AsyncMongoDBSettings -from stac_fastapi.mongo.database_logic import DatabaseLogic, create_collection_index +from stac_fastapi.mongo.database_logic import ( + DatabaseLogic, + create_collection_index, + create_item_index, +) settings = AsyncMongoDBSettings() session = Session.create_from_settings(settings) @@ -71,6 +75,7 @@ @app.on_event("startup") async def _startup_event() -> None: await create_collection_index() + await create_item_index() def run() -> None: From 7ab38877b9c5fc467575c68e8ad6360b76acf42c Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 24 Feb 2024 22:59:19 +0800 Subject: [PATCH 14/25] create item index --- .../stac_fastapi/mongo/database_logic.py | 410 +++++++++--------- 1 file changed, 206 insertions(+), 204 deletions(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index e5941073..6d6100ab 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -24,117 +24,61 @@ COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") ITEMS_INDEX = os.getenv("STAC_ITEMS_INDEX", "items") -# ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") DATABASE = os.getenv("MONGO_DB", "admin") -# def index_by_collection_id(collection_id: str) -> str: -# """ -# Translate a collection id into an Elasticsearch index name. - -# Args: -# collection_id (str): The collection id to translate into an index name. - -# Returns: -# str: The index name derived from the collection id. -# """ -# unsupported_chars = set('/\\ ."*<>:|?$') -# sanitized = "".join(c for c in collection_id if c not in unsupported_chars) -# return f"{ITEMS_INDEX_PREFIX}{sanitized.lower()}" - - -# def indices(collection_ids: Optional[List[str]]) -> str: -# """ -# Get a comma-separated string of index names for a given list of collection ids. - -# Args: -# collection_ids: A list of collection ids. - -# Returns: -# A string of comma-separated index names. If `collection_ids` is None, returns the default indices. -# """ -# if collection_ids is None: -# return COLLECTIONS_INDEX -# else: -# return ",".join([index_by_collection_id(c) for c in collection_ids]) - - -# async def create_collection_index(): -# """ -# Ensure indexes for the collections collection in MongoDB using the asynchronous client. - -# Returns: -# None -# """ -# client = AsyncSearchSettings().create_client -# if client: -# try: -# db = client[DATABASE] -# await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) -# print("Index created successfully.") -# except Exception as e: -# print(f"An error occurred while creating the index: {e}") -# finally: -# print(f"Closing client: {client}") -# client.close() -# else: -# print("Failed to create MongoDB client.") - - -# async def create_item_index(collection_id: str): -# """ -# Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. - -# Args: -# collection_id (str): Collection identifier used to derive the MongoDB collection name for items. - -# Returns: -# None -# """ -# client = AsyncSearchSettings.create_client -# db = client[DATABASE] - -# # Derive the collection name for items based on the collection_id -# collection_name = index_by_collection_id(collection_id) - -# try: -# await db[collection_name].create_index([("properties.datetime", -1)]) -# await db[collection_name].create_index([("id", 1)], unique=True) -# await db[collection_name].create_index([("geometry", "2dsphere")]) -# print(f"Indexes created successfully for collection: {collection_name}.") -# except Exception as e: -# # Handle exceptions, which could be due to existing index conflicts, etc. -# print( -# f"An error occurred while creating indexes for collection {collection_name}: {e}" -# ) -# finally: -# await client.close() - - -# async def delete_item_index(collection_id: str): -# """ -# Drop the MongoDB collection corresponding to the specified collection ID. - -# This operation is the MongoDB equivalent of deleting an Elasticsearch index, removing both the data and -# the structure for the specified collection's items. - -# Args: -# collection_id (str): The ID of the collection whose associated MongoDB collection will be dropped. -# """ -# client = AsyncSearchSettings.create_client -# db = client[DATABASE] - -# # Derive the MongoDB collection name using the collection ID -# collection_name = index_by_collection_id(collection_id) - -# try: -# # Drop the collection, removing both its data and structure -# await db[collection_name].drop() -# logger.info(f"Collection '{collection_name}' successfully dropped.") -# except Exception as e: -# logger.error(f"Error dropping collection '{collection_name}': {e}") -# finally: -# await client.close() +async def create_collection_index(): + """ + Ensure indexes for the collections collection in MongoDB using the asynchronous client. + + Returns: + None + """ + client = AsyncSearchSettings().create_client + if client: + try: + db = client[DATABASE] + await db[COLLECTIONS_INDEX].create_index([("id", 1)], unique=True) + print(f"Index created successfully for collection: {COLLECTIONS_INDEX}.") + except Exception as e: + # Handle exceptions, which could be due to existing index conflicts, etc. + print( + f"An error occurred while creating indexe for collection {COLLECTIONS_INDEX}: {e}" + ) + finally: + print(f"Closing client: {client}") + client.close() + else: + print("Failed to create MongoDB client.") + + +async def create_item_index(): + """ + Ensure indexes for a specific collection of items in MongoDB using the asynchronous client. + + Args: + collection_id (str): Collection identifier used to derive the MongoDB collection name for items. + + Returns: + None + """ + client = AsyncSearchSettings().create_client + + if client: + db = client[DATABASE] + collection = db[ITEMS_INDEX] + try: + await collection.create_index([("properties.datetime", -1)]) + await collection.create_index([("id", 1)], unique=True) + await collection.create_index([("geometry", "2dsphere")]) + print(f"Indexes created successfully for collection: {ITEMS_INDEX}.") + except Exception as e: + # Handle exceptions, which could be due to existing index conflicts, etc. + print( + f"An error occurred while creating indexes for collection {ITEMS_INDEX}: {e}" + ) + finally: + await client.close() def mk_item_id(item_id: str, collection_id: str): @@ -193,22 +137,6 @@ def add_filter(self, filter_condition): """ self.filters.append(filter_condition) - # def set_sort(self, sort_conditions): - # """ - # Set the sorting criteria for the query based on provided conditions. - - # This method translates a dictionary of field names and sort directions (asc or desc) into MongoDB's - # format for sorting queries. It overwrites any existing sort criteria with the new criteria provided. - - # Args: - # sort_conditions (dict): A dictionary where keys are field names and values are dictionaries - # indicating sort direction ('asc' for ascending or 'desc' for descending). - # """ - # self.sort = [] - # for field, details in sort_conditions.items(): - # direction = 1 if details["order"] == "asc" else -1 - # self.sort.append((field, direction)) - @attr.s class DatabaseLogic: @@ -233,7 +161,7 @@ async def get_all_collections( Retrieve a list of all collections from the MongoDB database, supporting pagination. Args: - token (Optional[str]): The pagination token, which is the ID of the last collection in the previous page. + token (Optional[str]): The pagination token, which is the ID of the last collection seen. limit (int): The maximum number of results to return. base_url (str): The base URL for constructing fully qualified links. @@ -247,26 +175,27 @@ async def get_all_collections( query: Dict[str, Any] = {} if token: last_seen_id = decode_token(token) + print(f"Decoded token (Last seen ID): {last_seen_id}") query = {"id": {"$gt": last_seen_id}} - cursor = collections_collection.find(query).sort("id", 1).limit(limit + 1) - collections = await cursor.to_list(length=limit + 1) + print(f"Query: {query}, Limit: {limit}") + cursor = collections_collection.find(query).sort("id", 1).limit(limit) + collections = await cursor.to_list(length=limit) - # Check if we have more items to paginate through next_token = None - if len(collections) > limit: - # Use the ID of the last item in the list as the next token + if len(collections) == limit: + # Assumes collections are sorted by 'id' in ascending order. next_token = encode_token(collections[-1]["id"]) - collections = collections[ - :-1 - ] # Remove the extra item used to check for next page + print(f"Next token (for next page): {next_token}") - # Serialize MongoDB documents to STAC-compliant collections serialized_collections = [ self.collection_serializer.db_to_stac(serialize_doc(collection), base_url) for collection in collections ] + print( + f"Serialized Collections: {serialized_collections}, Next Token: {next_token}" + ) return serialized_collections, next_token async def get_one_item(self, collection_id: str, item_id: str) -> Dict: @@ -281,28 +210,21 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: Raises: NotFoundError: If the specified Item does not exist in the Collection. - """ db = self.client[DATABASE] collection = db[ITEMS_INDEX] - try: - # Attempt to find the item in the specified collection - item = await collection.find_one({"id": item_id}) - if not item: - # If the item is not found, raise NotFoundError - raise NotFoundError(f"Item {item_id} does not exist.") - - # Serialize the MongoDB document to make it JSON serializable - serialized_item = serialize_doc(item) - - return serialized_item - except Exception as e: - # Log and re-raise any exceptions encountered during the operation - logger.error( - f"An error occurred while retrieving item {item_id} from collection {collection_id}: {e}" + # Adjusted to include collection_id in the query to fetch items within a specific collection + item = await collection.find_one({"id": item_id, "collection": collection_id}) + if not item: + # If the item is not found, raise NotFoundError + raise NotFoundError( + f"Item {item_id} in collection {collection_id} does not exist." ) - raise + + # Serialize the MongoDB document to make it JSON serializable + serialized_item = serialize_doc(item) + return serialized_item @staticmethod def make_search(): @@ -422,6 +344,9 @@ def apply_stacql_filter( "lte": "$lte", } + # Replace double underscores with dots for nested field queries + field = field.replace("__", ".") + # Construct the MongoDB filter if op in op_mapping: mongo_op = op_mapping[op] @@ -430,66 +355,124 @@ def apply_stacql_filter( raise ValueError(f"Unsupported operation '{op}'") # Add the constructed filter to the search adapter's filters - search = search.add_filter(filter_condition) + search.add_filter(filter_condition) return search @staticmethod - def translate_clause_to_mongo(clause: filter.Clause) -> dict: - """Translate a CQL2 Clause object to a MongoDB query. + def translate_cql2_to_mongo(cql2_filter: Dict[str, Any]) -> Dict[str, Any]: + """ + Translate a CQL2 filter dictionary to a MongoDB query. + + This function translates a CQL2 JSON filter into a MongoDB query format. It supports + various comparison operators, logical operators, and a special handling for spatial + intersections and the 'in' operator. Args: - clause (Clause): The Clause object to translate. + cql2_filter: A dictionary representing the CQL2 filter. Returns: - dict: The translated MongoDB query. - """ - # This function needs to recursively translate CQL2 Clauses to MongoDB queries - # Here we demonstrate a simple example of handling an "eq" operator - if clause.op == filter.ComparisonOp.eq: - # Direct translation of an "eq" operation to MongoDB's query syntax - return {clause.args[0].property: {"$eq": clause.args[1]}} - elif clause.op == filter.SpatialIntersectsOp.s_intersects: - # Example of handling a spatial intersects operation + A MongoDB query as a dictionary. + """ + print("CQL2 filter:", cql2_filter) + op_mapping = { + ">": "$gt", + ">=": "$gte", + "<": "$lt", + "<=": "$lte", + "=": "$eq", + "!=": "$ne", + "like": "$regex", + "in": "$in", + } + + if cql2_filter["op"] in ["and", "or"]: + mongo_op = f"${cql2_filter['op']}" return { - clause.args[0].property: { - "$geoIntersects": { - "$geometry": clause.args[ - 1 - ].__geo_interface__ # Assuming args[1] is a GeoJSON-pydantic model - } - } + mongo_op: [ + DatabaseLogic.translate_cql2_to_mongo(arg) + for arg in cql2_filter["args"] + ] } - # Add additional elif blocks to handle other operators like "lt", "lte", "gt", "gte", "neq", etc. - else: - raise NotImplementedError( - f"Operator {clause.op} not implemented for MongoDB translation." + + elif cql2_filter["op"] == "not": + translated_condition = DatabaseLogic.translate_cql2_to_mongo( + cql2_filter["args"][0] ) + return {"$nor": [translated_condition]} + + elif cql2_filter["op"] == "s_intersects": + geometry = cql2_filter["args"][1] + return {"geometry": {"$geoIntersects": {"$geometry": geometry}}} + + elif cql2_filter["op"] == "between": + property_path = "properties." + cql2_filter["args"][0]["property"] + lower_bound = cql2_filter["args"][1] + upper_bound = cql2_filter["args"][2] + return {property_path: {"$gte": lower_bound, "$lte": upper_bound}} + + else: + property_name = cql2_filter["args"][0]["property"] + # Check if the property name is in the special mapping + if property_name in filter.queryables_mapping: + property_path = filter.queryables_mapping[property_name] + elif property_name not in [ + "id", + "collection", + ] and not property_name.startswith("properties."): + property_path = f"properties.{property_name}" + else: + property_path = property_name + + value = cql2_filter["args"][1] + # Attempt to convert numeric string to float or integer + try: + if "." in value: + value = float(value) + else: + value = int(value) + except (ValueError, TypeError): + pass # Keep value as is if conversion is not possible + mongo_op = op_mapping.get(cql2_filter["op"]) + + print("VALUE", type(value)) + + if mongo_op is None: + raise ValueError( + f"Unsupported operation '{cql2_filter['op']}' in CQL2 filter." + ) + + if mongo_op == "$regex": + return { + property_path: {mongo_op: value.replace("%", ".*"), "$options": "i"} + } + elif mongo_op == "$in": + if not isinstance(value, list): + raise ValueError(f"Arg {value} is not a list") + return {property_path: {mongo_op: value}} + else: + return {property_path: {mongo_op: value}} @staticmethod def apply_cql2_filter( - search_adapter: MongoSearchAdapter, _filter: Optional[filter.Clause] + search_adapter: "MongoSearchAdapter", _filter: Optional[Dict[str, Any]] ): - """Adapt database logic to apply a CQL2 filter for MongoDB search endpoint. + """ + Apply a CQL2 JSON filter to the MongoDB search adapter. + + This method translates a CQL2 JSON filter into MongoDB's query syntax and adds it to the adapter's filters. Args: - search_adapter (MongoSearchAdapter): The search adapter to which the filter will be applied. - _filter (Optional[Clause]): A Clause representing the filter criteria. + search_adapter (MongoSearchAdapter): The MongoDB search adapter to which the filter will be applied. + _filter (Optional[Dict[str, Any]]): The CQL2 filter as a dictionary. If None, no action is taken. Returns: - MongoSearchAdapter: The search adapter with the filter applied. + MongoSearchAdapter: The search adapter with the CQL2 filter applied. """ - if _filter is None: - return search_adapter - - # Translating the CQL2 Clause to a MongoDB query - try: - # Assuming _filter is a Clause object as defined above - mongo_query = DatabaseLogic.translate_clause_to_mongo(_filter) + if _filter is not None: + mongo_query = DatabaseLogic.translate_cql2_to_mongo(_filter) search_adapter.add_filter(mongo_query) - except Exception as e: - # Handle translation errors or unsupported features - print(f"Error translating CQL2 Clause to MongoDB query: {e}") + print("search adapter: ", search_adapter) return search_adapter @staticmethod @@ -550,9 +533,10 @@ async def execute_search( """ db = self.client[DATABASE] collection = db[ITEMS_INDEX] - print("Filters: ", search.filters) - query = {"$and": search.filters} if search.filters else {} + query = {"$and": search.filters} if search and search.filters else {} + + print("Query: ", query) if collection_ids: query["collection"] = {"$in": collection_ids} @@ -703,14 +687,14 @@ def sync_prep_create_item( # Check if the collection exists collection_exists = collections_collection.count_documents( - {"id": item.collection}, limit=1 + {"id": item["collection"]}, limit=1 ) if not collection_exists: - raise NotFoundError(f"Collection {item.collection} does not exist") + raise NotFoundError(f"Collection {item['collection']} does not exist") # Transform item using item_serializer for MongoDB compatibility mongo_item = self.item_serializer.stac_to_db(item, base_url) - + print("mongo item id: ", mongo_item["id"]) if not exist_ok: existing_item = items_collection.find_one({"id": mongo_item["id"]}) if existing_item: @@ -719,7 +703,7 @@ def sync_prep_create_item( ) # Return the transformed item ready for insertion - return mongo_item + return serialize_doc(mongo_item) async def delete_item( self, item_id: str, collection_id: str, refresh: bool = False @@ -824,16 +808,22 @@ async def update_collection( Raises: NotFoundError: If the collection with the specified ID does not exist. ConflictError: If attempting to change the collection ID to one that already exists. + + Note: + This function handles both updating a collection's metadata and changing its ID. + It does not directly modify the `_id` field, which is immutable in MongoDB. + When changing a collection's ID, it creates a new document with the new ID and deletes the old document. """ db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] + # Ensure the existing collection exists existing_collection = await self.find_collection(collection_id) if not existing_collection: raise NotFoundError(f"Collection {collection_id} not found") + # Handle changing collection ID if collection_id != collection["id"]: - # Check if the new ID already exists new_id_exists = await collections_collection.find_one( {"id": collection["id"]} ) @@ -842,29 +832,40 @@ async def update_collection( f"Collection with ID {collection['id']} already exists" ) - # Update the collection ID in all related documents/items items_collection = db[ITEMS_INDEX] + # Update only items related to the old collection ID to the new collection ID await items_collection.update_many( - {}, {"$set": {"collection": collection["id"]}} + {"collection": collection_id}, + {"$set": {"collection": collection["id"]}}, ) # Insert the new collection and delete the old one await collections_collection.insert_one(collection) await collections_collection.delete_one({"id": collection_id}) - - # Optionally, handle renaming or moving documents to a new collection if necessary else: - # Update the existing collection with new data + # Update the existing collection with new data, ensuring not to attempt to update `_id` await collections_collection.update_one( - {"id": collection_id}, {"$set": collection} + {"id": collection_id}, + {"$set": {k: v for k, v in collection.items() if k != "_id"}}, ) async def delete_collection(self, collection_id: str): """ Delete a collection from the MongoDB database and all items associated with it. + This function first attempts to delete the specified collection from the database. + If the collection exists and is successfully deleted, it then proceeds to delete + all items that are associated with this collection. If the collection does not exist, + a NotFoundError is raised to indicate the collection cannot be found in the database. + Args: collection_id (str): The ID of the collection to be deleted. + + Raises: + NotFoundError: If the collection with the specified ID does not exist in the database. + + This ensures that when a collection is deleted, all of its items are also cleaned up from the database, + maintaining data integrity and avoiding orphaned items without a parent collection. """ db = self.client[DATABASE] collections_collection = db[COLLECTIONS_INDEX] @@ -875,9 +876,10 @@ async def delete_collection(self, collection_id: str): {"id": collection_id} ) if collection_result.deleted_count == 0: + # Collection not found, raise an error raise NotFoundError(f"Collection {collection_id} not found") - # Delete all items associated with the collection + # Successfully found and deleted the collection, now delete its items await items_collection.delete_many({"collection": collection_id}) async def bulk_async( From 8c1cae507c98ce749113c45eebbb9c8576cc49ae Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 25 Feb 2024 14:08:19 +0800 Subject: [PATCH 15/25] fields test --- .../stac_fastapi/mongo/database_logic.py | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index 6d6100ab..71a59c39 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -2,6 +2,7 @@ import base64 import logging import os +import re from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union import attr @@ -229,14 +230,12 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: @staticmethod def make_search(): """Database logic to create a Search instance.""" - # return Search().sort(*DEFAULT_SORT) - print("make_search hello") return MongoSearchAdapter() @staticmethod def apply_ids_filter(search: MongoSearchAdapter, item_ids: List[str]): """Database logic to search a list of STAC item ids.""" - search.add_filter({"_id": {"$in": item_ids}}) + search.add_filter({"id": {"$in": item_ids}}) return search @staticmethod @@ -405,7 +404,19 @@ def translate_cql2_to_mongo(cql2_filter: Dict[str, Any]) -> Dict[str, Any]: return {"geometry": {"$geoIntersects": {"$geometry": geometry}}} elif cql2_filter["op"] == "between": - property_path = "properties." + cql2_filter["args"][0]["property"] + property_name = cql2_filter["args"][0]["property"] + + # Use the special mapping directly if available, or construct the path appropriately + if property_name in filter.queryables_mapping: + property_path = filter.queryables_mapping[property_name] + elif property_name not in [ + "id", + "collection", + ] and not property_name.startswith("properties."): + property_path = f"properties.{property_name}" + else: + property_path = property_name + lower_bound = cql2_filter["args"][1] upper_bound = cql2_filter["args"][2] return {property_path: {"$gte": lower_bound, "$lte": upper_bound}} @@ -442,9 +453,22 @@ def translate_cql2_to_mongo(cql2_filter: Dict[str, Any]) -> Dict[str, Any]: ) if mongo_op == "$regex": - return { - property_path: {mongo_op: value.replace("%", ".*"), "$options": "i"} - } + # Replace SQL LIKE wildcards with regex equivalents, handling escaped characters + regex_pattern = re.sub( + r"(? Date: Sat, 2 Mar 2024 23:48:16 +0800 Subject: [PATCH 16/25] remove es os 1 --- dockerfiles/Dockerfile.deploy.es | 20 - dockerfiles/Dockerfile.deploy.os | 20 - dockerfiles/Dockerfile.dev.es | 19 - dockerfiles/Dockerfile.dev.os | 19 - elasticsearch/config/elasticsearch.yml | 18 - opensearch/config/opensearch.yml | 19 - stac_fastapi/core/README.md | 1 - stac_fastapi/core/setup.cfg | 2 - stac_fastapi/core/setup.py | 44 - .../core/stac_fastapi/core/__init__.py | 1 - .../stac_fastapi/core/base_database_logic.py | 54 - .../core/stac_fastapi/core/base_settings.py | 12 - stac_fastapi/core/stac_fastapi/core/core.py | 953 ------------------ .../core/stac_fastapi/core/datetime_utils.py | 14 - .../stac_fastapi/core/extensions/__init__.py | 5 - .../stac_fastapi/core/extensions/filter.py | 267 ----- .../stac_fastapi/core/extensions/query.py | 81 -- .../core/stac_fastapi/core/models/__init__.py | 1 - .../core/stac_fastapi/core/models/links.py | 138 --- .../core/stac_fastapi/core/models/search.py | 1 - .../core/stac_fastapi/core/serializers.py | 156 --- .../core/stac_fastapi/core/session.py | 25 - .../core/stac_fastapi/core/types/core.py | 306 ------ .../core/stac_fastapi/core/utilities.py | 21 - .../core/stac_fastapi/core/version.py | 2 - stac_fastapi/elasticsearch/README.md | 3 - stac_fastapi/elasticsearch/pytest.ini | 4 - stac_fastapi/elasticsearch/setup.cfg | 2 - stac_fastapi/elasticsearch/setup.py | 58 -- .../stac_fastapi/elasticsearch/__init__.py | 1 - .../stac_fastapi/elasticsearch/app.py | 109 -- .../stac_fastapi/elasticsearch/config.py | 80 -- .../elasticsearch/database_logic.py | 894 ---------------- .../stac_fastapi/elasticsearch/version.py | 2 - stac_fastapi/opensearch/README.md | 1 - stac_fastapi/opensearch/pytest.ini | 4 - stac_fastapi/opensearch/setup.cfg | 2 - stac_fastapi/opensearch/setup.py | 55 - .../stac_fastapi/opensearch/__init__.py | 1 - .../opensearch/stac_fastapi/opensearch/app.py | 109 -- .../stac_fastapi/opensearch/config.py | 81 -- .../stac_fastapi/opensearch/database_logic.py | 922 ----------------- .../stac_fastapi/opensearch/version.py | 2 - 43 files changed, 4529 deletions(-) delete mode 100644 dockerfiles/Dockerfile.deploy.es delete mode 100644 dockerfiles/Dockerfile.deploy.os delete mode 100644 dockerfiles/Dockerfile.dev.es delete mode 100644 dockerfiles/Dockerfile.dev.os delete mode 100644 elasticsearch/config/elasticsearch.yml delete mode 100644 opensearch/config/opensearch.yml delete mode 100644 stac_fastapi/core/README.md delete mode 100644 stac_fastapi/core/setup.cfg delete mode 100644 stac_fastapi/core/setup.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/__init__.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/base_database_logic.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/base_settings.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/core.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/datetime_utils.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/__init__.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/filter.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/extensions/query.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/models/__init__.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/models/links.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/models/search.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/serializers.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/session.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/types/core.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/utilities.py delete mode 100644 stac_fastapi/core/stac_fastapi/core/version.py delete mode 100644 stac_fastapi/elasticsearch/README.md delete mode 100644 stac_fastapi/elasticsearch/pytest.ini delete mode 100644 stac_fastapi/elasticsearch/setup.cfg delete mode 100644 stac_fastapi/elasticsearch/setup.py delete mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py delete mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py delete mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py delete mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py delete mode 100644 stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py delete mode 100644 stac_fastapi/opensearch/README.md delete mode 100644 stac_fastapi/opensearch/pytest.ini delete mode 100644 stac_fastapi/opensearch/setup.cfg delete mode 100644 stac_fastapi/opensearch/setup.py delete mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py delete mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/app.py delete mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/config.py delete mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py delete mode 100644 stac_fastapi/opensearch/stac_fastapi/opensearch/version.py diff --git a/dockerfiles/Dockerfile.deploy.es b/dockerfiles/Dockerfile.deploy.es deleted file mode 100644 index 2eab7b9d..00000000 --- a/dockerfiles/Dockerfile.deploy.es +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.10-slim - -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install gcc && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -WORKDIR /app - -COPY . /app - -RUN pip install --no-cache-dir -e ./stac_fastapi/core -RUN pip install --no-cache-dir ./stac_fastapi/elasticsearch[server] - -EXPOSE 8080 - -CMD ["uvicorn", "stac_fastapi.elasticsearch.app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/dockerfiles/Dockerfile.deploy.os b/dockerfiles/Dockerfile.deploy.os deleted file mode 100644 index 035b181e..00000000 --- a/dockerfiles/Dockerfile.deploy.os +++ /dev/null @@ -1,20 +0,0 @@ -FROM python:3.10-slim - -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install gcc && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -WORKDIR /app - -COPY . /app - -RUN pip install --no-cache-dir -e ./stac_fastapi/core -RUN pip install --no-cache-dir ./stac_fastapi/opensearch[server] - -EXPOSE 8080 - -CMD ["uvicorn", "stac_fastapi.opensearch.app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/dockerfiles/Dockerfile.dev.es b/dockerfiles/Dockerfile.dev.es deleted file mode 100644 index a4248d39..00000000 --- a/dockerfiles/Dockerfile.dev.es +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.10-slim - - -# update apt pkgs, and install build-essential for ciso8601 -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get install -y build-essential && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# update certs used by Requests -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -WORKDIR /app - -COPY . /app - -RUN pip install --no-cache-dir -e ./stac_fastapi/core -RUN pip install --no-cache-dir -e ./stac_fastapi/elasticsearch[dev,server] diff --git a/dockerfiles/Dockerfile.dev.os b/dockerfiles/Dockerfile.dev.os deleted file mode 100644 index d9dc8b0a..00000000 --- a/dockerfiles/Dockerfile.dev.os +++ /dev/null @@ -1,19 +0,0 @@ -FROM python:3.10-slim - - -# update apt pkgs, and install build-essential for ciso8601 -RUN apt-get update && \ - apt-get -y upgrade && \ - apt-get install -y build-essential && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# update certs used by Requests -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -WORKDIR /app - -COPY . /app - -RUN pip install --no-cache-dir -e ./stac_fastapi/core -RUN pip install --no-cache-dir -e ./stac_fastapi/opensearch[dev,server] diff --git a/elasticsearch/config/elasticsearch.yml b/elasticsearch/config/elasticsearch.yml deleted file mode 100644 index 0bf5b680..00000000 --- a/elasticsearch/config/elasticsearch.yml +++ /dev/null @@ -1,18 +0,0 @@ -## Cluster Settings -cluster.name: stac-cluster -node.name: es01 -network.host: 0.0.0.0 -transport.host: 0.0.0.0 -discovery.type: single-node -http.port: 9200 - -path: - repo: - - /usr/share/elasticsearch/snapshots - -## License -xpack.license.self_generated.type: basic - -# Security -xpack.security.enabled: false -xpack.security.transport.ssl.enabled: false \ No newline at end of file diff --git a/opensearch/config/opensearch.yml b/opensearch/config/opensearch.yml deleted file mode 100644 index 5e44b259..00000000 --- a/opensearch/config/opensearch.yml +++ /dev/null @@ -1,19 +0,0 @@ -## Cluster Settings -cluster.name: stac-cluster -node.name: os01 -network.host: 0.0.0.0 -transport.host: 0.0.0.0 -discovery.type: single-node -http.port: 9202 -http.cors.enabled: true -http.cors.allow-headers: X-Requested-With,Content-Type,Content-Length,Accept,Authorization - -path: - repo: - - /usr/share/opensearch/snapshots - -# Security -plugins.security.disabled: true -plugins.security.ssl.http.enabled: true - -node.max_local_storage_nodes: 3 diff --git a/stac_fastapi/core/README.md b/stac_fastapi/core/README.md deleted file mode 100644 index 02f4e35a..00000000 --- a/stac_fastapi/core/README.md +++ /dev/null @@ -1 +0,0 @@ -# stac-fastapi core library for Elasticsearch and Opensearch backends \ No newline at end of file diff --git a/stac_fastapi/core/setup.cfg b/stac_fastapi/core/setup.cfg deleted file mode 100644 index 1eb3fa49..00000000 --- a/stac_fastapi/core/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -version = attr: stac_fastapi.core.version.__version__ diff --git a/stac_fastapi/core/setup.py b/stac_fastapi/core/setup.py deleted file mode 100644 index 68ba8f70..00000000 --- a/stac_fastapi/core/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -"""stac_fastapi: core elasticsearch/ opensearch module.""" - -from setuptools import find_namespace_packages, setup - -with open("README.md") as f: - desc = f.read() - -install_requires = [ - "fastapi", - "attrs", - "pydantic[dotenv]<2", - "stac_pydantic==2.0.*", - "stac-fastapi.types==2.4.9", - "stac-fastapi.api==2.4.9", - "stac-fastapi.extensions==2.4.9", - "pystac[validation]", - "orjson", - "overrides", - "geojson-pydantic", - "pygeofilter==0.2.1", -] - -setup( - name="stac-fastapi.core", - description="Core library for the Elasticsearch and Opensearch stac-fastapi backends.", - long_description=desc, - long_description_content_type="text/markdown", - python_requires=">=3.8", - classifiers=[ - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: MIT License", - ], - url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", - license="MIT", - packages=find_namespace_packages(), - zip_safe=False, - install_requires=install_requires, -) diff --git a/stac_fastapi/core/stac_fastapi/core/__init__.py b/stac_fastapi/core/stac_fastapi/core/__init__.py deleted file mode 100644 index 32b338eb..00000000 --- a/stac_fastapi/core/stac_fastapi/core/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Core library.""" diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py deleted file mode 100644 index 0043cfb8..00000000 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Base database logic.""" - -import abc -from typing import Any, Dict, Iterable, Optional - - -class BaseDatabaseLogic(abc.ABC): - """ - Abstract base class for database logic. - - This class defines the basic structure and operations for database interactions. - Subclasses must provide implementations for these methods. - """ - - @abc.abstractmethod - async def get_all_collections( - self, token: Optional[str], limit: int - ) -> Iterable[Dict[str, Any]]: - """Retrieve a list of all collections from the database.""" - pass - - @abc.abstractmethod - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: - """Retrieve a single item from the database.""" - pass - - @abc.abstractmethod - async def create_item(self, item: Dict, refresh: bool = False) -> None: - """Create an item in the database.""" - pass - - @abc.abstractmethod - async def delete_item( - self, item_id: str, collection_id: str, refresh: bool = False - ) -> None: - """Delete an item from the database.""" - pass - - @abc.abstractmethod - async def create_collection(self, collection: Dict, refresh: bool = False) -> None: - """Create a collection in the database.""" - pass - - @abc.abstractmethod - async def find_collection(self, collection_id: str) -> Dict: - """Find a collection in the database.""" - pass - - @abc.abstractmethod - async def delete_collection( - self, collection_id: str, refresh: bool = False - ) -> None: - """Delete a collection from the database.""" - pass diff --git a/stac_fastapi/core/stac_fastapi/core/base_settings.py b/stac_fastapi/core/stac_fastapi/core/base_settings.py deleted file mode 100644 index f30d07a4..00000000 --- a/stac_fastapi/core/stac_fastapi/core/base_settings.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Base settings.""" - -from abc import ABC, abstractmethod - - -class ApiBaseSettings(ABC): - """Abstract base class for API settings.""" - - @abstractmethod - def create_client(self): - """Create a database client.""" - pass diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py deleted file mode 100644 index a12adeef..00000000 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ /dev/null @@ -1,953 +0,0 @@ -"""Item crud client.""" -import logging -import re -from datetime import datetime as datetime_type -from datetime import timezone -from typing import Any, Dict, List, Optional, Set, Type, Union -from urllib.parse import unquote_plus, urljoin - -import attr -import orjson -import stac_pydantic -from fastapi import HTTPException, Request -from overrides import overrides -from pydantic import ValidationError -from pygeofilter.backends.cql2_json import to_cql2 -from pygeofilter.parsers.cql2_text import parse as parse_cql2_text -from stac_pydantic.links import Relations -from stac_pydantic.shared import MimeTypes -from stac_pydantic.version import STAC_VERSION - -from stac_fastapi.core.base_database_logic import BaseDatabaseLogic -from stac_fastapi.core.base_settings import ApiBaseSettings -from stac_fastapi.core.models.links import PagingLinks -from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.session import Session -from stac_fastapi.core.types.core import ( - AsyncBaseCoreClient, - AsyncBaseFiltersClient, - AsyncBaseTransactionsClient, -) -from stac_fastapi.extensions.third_party.bulk_transactions import ( - BaseBulkTransactionsClient, - BulkTransactionMethod, - Items, -) -from stac_fastapi.types import stac as stac_types -from stac_fastapi.types.config import Settings -from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES -from stac_fastapi.types.extension import ApiExtension -from stac_fastapi.types.links import CollectionLinks -from stac_fastapi.types.requests import get_base_url -from stac_fastapi.types.search import BaseSearchPostRequest -from stac_fastapi.types.stac import Collection, Collections, Item, ItemCollection - -logger = logging.getLogger(__name__) - -NumType = Union[float, int] - - -@attr.s -class CoreClient(AsyncBaseCoreClient): - """Client for core endpoints defined by the STAC specification. - - This class is a implementation of `AsyncBaseCoreClient` that implements the core endpoints - defined by the STAC specification. It uses the `DatabaseLogic` class to interact with the - database, and `ItemSerializer` and `CollectionSerializer` to convert between STAC objects and - database records. - - Attributes: - session (Session): A requests session instance to be used for all HTTP requests. - item_serializer (Type[serializers.ItemSerializer]): A serializer class to be used to convert - between STAC items and database records. - collection_serializer (Type[serializers.CollectionSerializer]): A serializer class to be - used to convert between STAC collections and database records. - database (DatabaseLogic): An instance of the `DatabaseLogic` class that is used to interact - with the database. - """ - - database: BaseDatabaseLogic = attr.ib() - base_conformance_classes: List[str] = attr.ib( - factory=lambda: BASE_CONFORMANCE_CLASSES - ) - extensions: List[ApiExtension] = attr.ib(default=attr.Factory(list)) - - session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) - item_serializer: Type[ItemSerializer] = attr.ib(default=ItemSerializer) - collection_serializer: Type[CollectionSerializer] = attr.ib( - default=CollectionSerializer - ) - post_request_model = attr.ib(default=BaseSearchPostRequest) - stac_version: str = attr.ib(default=STAC_VERSION) - landing_page_id: str = attr.ib(default="stac-fastapi") - title: str = attr.ib(default="stac-fastapi") - description: str = attr.ib(default="stac-fastapi") - - def _landing_page( - self, - base_url: str, - conformance_classes: List[str], - extension_schemas: List[str], - ) -> stac_types.LandingPage: - landing_page = stac_types.LandingPage( - type="Catalog", - id=self.landing_page_id, - title=self.title, - description=self.description, - stac_version=self.stac_version, - conformsTo=conformance_classes, - links=[ - { - "rel": Relations.self.value, - "type": MimeTypes.json, - "href": base_url, - }, - { - "rel": Relations.root.value, - "type": MimeTypes.json, - "href": base_url, - }, - { - "rel": "data", - "type": MimeTypes.json, - "href": urljoin(base_url, "collections"), - }, - { - "rel": Relations.conformance.value, - "type": MimeTypes.json, - "title": "STAC/WFS3 conformance classes implemented by this server", - "href": urljoin(base_url, "conformance"), - }, - { - "rel": Relations.search.value, - "type": MimeTypes.geojson, - "title": "STAC search", - "href": urljoin(base_url, "search"), - "method": "GET", - }, - { - "rel": Relations.search.value, - "type": MimeTypes.geojson, - "title": "STAC search", - "href": urljoin(base_url, "search"), - "method": "POST", - }, - ], - stac_extensions=extension_schemas, - ) - return landing_page - - async def landing_page(self, **kwargs) -> stac_types.LandingPage: - """Landing page. - - Called with `GET /`. - - Returns: - API landing page, serving as an entry point to the API. - """ - request: Request = kwargs["request"] - base_url = get_base_url(request) - landing_page = self._landing_page( - base_url=base_url, - conformance_classes=self.conformance_classes(), - extension_schemas=[], - ) - collections = await self.all_collections(request=kwargs["request"]) - for collection in collections["collections"]: - landing_page["links"].append( - { - "rel": Relations.child.value, - "type": MimeTypes.json.value, - "title": collection.get("title") or collection.get("id"), - "href": urljoin(base_url, f"collections/{collection['id']}"), - } - ) - - # Add OpenAPI URL - landing_page["links"].append( - { - "rel": "service-desc", - "type": "application/vnd.oai.openapi+json;version=3.0", - "title": "OpenAPI service description", - "href": urljoin( - str(request.base_url), request.app.openapi_url.lstrip("/") - ), - } - ) - - # Add human readable service-doc - landing_page["links"].append( - { - "rel": "service-doc", - "type": "text/html", - "title": "OpenAPI service documentation", - "href": urljoin( - str(request.base_url), request.app.docs_url.lstrip("/") - ), - } - ) - - return landing_page - - async def all_collections(self, **kwargs) -> Collections: - """Read all collections from the database. - - Args: - **kwargs: Keyword arguments from the request. - - Returns: - A Collections object containing all the collections in the database and links to various resources. - """ - request = kwargs["request"] - base_url = str(request.base_url) - limit = int(request.query_params.get("limit", 10)) - token = request.query_params.get("token") - - collections, next_token = await self.database.get_all_collections( - token=token, limit=limit, base_url=base_url - ) - - links = [ - {"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url}, - {"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url}, - { - "rel": Relations.self.value, - "type": MimeTypes.json, - "href": f"{base_url}collections", - }, - ] - - if next_token: - next_link = PagingLinks(next=next_token, request=request).link_next() - links.append(next_link) - - return Collections(collections=collections, links=links) - - async def get_collection(self, collection_id: str, **kwargs) -> Collection: - """Get a collection from the database by its id. - - Args: - collection_id (str): The id of the collection to retrieve. - kwargs: Additional keyword arguments passed to the API call. - - Returns: - Collection: A `Collection` object representing the requested collection. - - Raises: - NotFoundError: If the collection with the given id cannot be found in the database. - """ - base_url = str(kwargs["request"].base_url) - collection = await self.database.find_collection(collection_id=collection_id) - return self.collection_serializer.db_to_stac( - collection=collection, base_url=base_url - ) - - async def item_collection( - self, - collection_id: str, - bbox: Optional[List[NumType]] = None, - datetime: Union[str, datetime_type, None] = None, - limit: int = 10, - token: str = None, - **kwargs, - ) -> ItemCollection: - """Read items from a specific collection in the database. - - Args: - collection_id (str): The identifier of the collection to read items from. - bbox (Optional[List[NumType]]): The bounding box to filter items by. - datetime (Union[str, datetime_type, None]): The datetime range to filter items by. - limit (int): The maximum number of items to return. The default value is 10. - token (str): A token used for pagination. - request (Request): The incoming request. - - Returns: - ItemCollection: An `ItemCollection` object containing the items from the specified collection that meet - the filter criteria and links to various resources. - - Raises: - HTTPException: If the specified collection is not found. - Exception: If any error occurs while reading the items from the database. - """ - request: Request = kwargs["request"] - base_url = str(request.base_url) - - collection = await self.get_collection( - collection_id=collection_id, request=request - ) - collection_id = collection.get("id") - if collection_id is None: - raise HTTPException(status_code=404, detail="Collection not found") - - search = self.database.make_search() - search = self.database.apply_collections_filter( - search=search, collection_ids=[collection_id] - ) - - if datetime: - datetime_search = self._return_date(datetime) - search = self.database.apply_datetime_filter( - search=search, datetime_search=datetime_search - ) - - if bbox: - bbox = [float(x) for x in bbox] - if len(bbox) == 6: - bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] - - search = self.database.apply_bbox_filter(search=search, bbox=bbox) - - items, maybe_count, next_token = await self.database.execute_search( - search=search, - limit=limit, - sort=None, - token=token, # type: ignore - collection_ids=[collection_id], - ) - - items = [ - self.item_serializer.db_to_stac(item, base_url=base_url) for item in items - ] - - context_obj = None - if self.extension_is_enabled("ContextExtension"): - context_obj = { - "returned": len(items), - "limit": limit, - } - if maybe_count is not None: - context_obj["matched"] = maybe_count - - links = [] - if next_token: - links = await PagingLinks(request=request, next=next_token).get_links() - - return ItemCollection( - type="FeatureCollection", - features=items, - links=links, - context=context_obj, - ) - - async def get_item(self, item_id: str, collection_id: str, **kwargs) -> Item: - """Get an item from the database based on its id and collection id. - - Args: - collection_id (str): The ID of the collection the item belongs to. - item_id (str): The ID of the item to be retrieved. - - Returns: - Item: An `Item` object representing the requested item. - - Raises: - Exception: If any error occurs while getting the item from the database. - NotFoundError: If the item does not exist in the specified collection. - """ - base_url = str(kwargs["request"].base_url) - item = await self.database.get_one_item( - item_id=item_id, collection_id=collection_id - ) - return self.item_serializer.db_to_stac(item, base_url) - - @staticmethod - def _return_date(interval_str): - """ - Convert a date interval string into a dictionary for filtering search results. - - The date interval string should be formatted as either a single date or a range of dates separated - by "/". The date format should be ISO-8601 (YYYY-MM-DDTHH:MM:SSZ). If the interval string is a - single date, it will be converted to a dictionary with a single "eq" key whose value is the date in - the ISO-8601 format. If the interval string is a range of dates, it will be converted to a - dictionary with "gte" (greater than or equal to) and "lte" (less than or equal to) keys. If the - interval string is a range of dates with ".." instead of "/", the start and end dates will be - assigned default values to encompass the entire possible date range. - - Args: - interval_str (str): The date interval string to be converted. - - Returns: - dict: A dictionary representing the date interval for use in filtering search results. - """ - intervals = interval_str.split("/") - if len(intervals) == 1: - datetime = f"{intervals[0][0:19]}Z" - return {"eq": datetime} - else: - start_date = intervals[0] - end_date = intervals[1] - if ".." not in intervals: - start_date = f"{start_date[0:19]}Z" - end_date = f"{end_date[0:19]}Z" - elif start_date != "..": - start_date = f"{start_date[0:19]}Z" - end_date = "2200-12-01T12:31:12Z" - elif end_date != "..": - start_date = "1900-10-01T00:00:00Z" - end_date = f"{end_date[0:19]}Z" - else: - start_date = "1900-10-01T00:00:00Z" - end_date = "2200-12-01T12:31:12Z" - - return {"lte": end_date, "gte": start_date} - - async def get_search( - self, - request: Request, - collections: Optional[List[str]] = None, - ids: Optional[List[str]] = None, - bbox: Optional[List[NumType]] = None, - datetime: Optional[Union[str, datetime_type]] = None, - limit: Optional[int] = 10, - query: Optional[str] = None, - token: Optional[str] = None, - fields: Optional[List[str]] = None, - sortby: Optional[str] = None, - intersects: Optional[str] = None, - filter: Optional[str] = None, - filter_lang: Optional[str] = None, - **kwargs, - ) -> ItemCollection: - """Get search results from the database. - - Args: - collections (Optional[List[str]]): List of collection IDs to search in. - ids (Optional[List[str]]): List of item IDs to search for. - bbox (Optional[List[NumType]]): Bounding box to search in. - datetime (Optional[Union[str, datetime_type]]): Filter items based on the datetime field. - limit (Optional[int]): Maximum number of results to return. - query (Optional[str]): Query string to filter the results. - token (Optional[str]): Access token to use when searching the catalog. - fields (Optional[List[str]]): Fields to include or exclude from the results. - sortby (Optional[str]): Sorting options for the results. - intersects (Optional[str]): GeoJSON geometry to search in. - kwargs: Additional parameters to be passed to the API. - - Returns: - ItemCollection: Collection of `Item` objects representing the search results. - - Raises: - HTTPException: If any error occurs while searching the catalog. - """ - base_args = { - "collections": collections, - "ids": ids, - "bbox": bbox, - "limit": limit, - "token": token, - "query": orjson.loads(query) if query else query, - } - - # this is borrowed from stac-fastapi-pgstac - # Kludgy fix because using factory does not allow alias for filter-lan - query_params = str(request.query_params) - if filter_lang is None: - match = re.search(r"filter-lang=([a-z0-9-]+)", query_params, re.IGNORECASE) - if match: - filter_lang = match.group(1) - - if datetime: - base_args["datetime"] = datetime - - if intersects: - base_args["intersects"] = orjson.loads(unquote_plus(intersects)) - - if sortby: - sort_param = [] - for sort in sortby: - sort_param.append( - { - "field": sort[1:], - "direction": "desc" if sort[0] == "-" else "asc", - } - ) - print(sort_param) - base_args["sortby"] = sort_param - - if filter: - if filter_lang == "cql2-json": - base_args["filter-lang"] = "cql2-json" - base_args["filter"] = orjson.loads(unquote_plus(filter)) - else: - base_args["filter-lang"] = "cql2-json" - base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter))) - - if fields: - includes = set() - excludes = set() - for field in fields: - if field[0] == "-": - excludes.add(field[1:]) - elif field[0] == "+": - includes.add(field[1:]) - else: - includes.add(field) - base_args["fields"] = {"include": includes, "exclude": excludes} - - # Do the request - try: - search_request = self.post_request_model(**base_args) - except ValidationError: - raise HTTPException(status_code=400, detail="Invalid parameters provided") - resp = await self.post_search(search_request=search_request, request=request) - - return resp - - async def post_search( - self, search_request: BaseSearchPostRequest, request: Request - ) -> ItemCollection: - """ - Perform a POST search on the catalog. - - Args: - search_request (BaseSearchPostRequest): Request object that includes the parameters for the search. - kwargs: Keyword arguments passed to the function. - - Returns: - ItemCollection: A collection of items matching the search criteria. - - Raises: - HTTPException: If there is an error with the cql2_json filter. - """ - base_url = str(request.base_url) - - search = self.database.make_search() - - if search_request.ids: - search = self.database.apply_ids_filter( - search=search, item_ids=search_request.ids - ) - - if search_request.collections: - search = self.database.apply_collections_filter( - search=search, collection_ids=search_request.collections - ) - - if search_request.datetime: - datetime_search = self._return_date(search_request.datetime) - search = self.database.apply_datetime_filter( - search=search, datetime_search=datetime_search - ) - - if search_request.bbox: - bbox = search_request.bbox - if len(bbox) == 6: - bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] - - search = self.database.apply_bbox_filter(search=search, bbox=bbox) - - if search_request.intersects: - print("INTERSECTS: HELLO") - print("SEARCH1: ", search) - search = self.database.apply_intersects_filter( - search=search, intersects=search_request.intersects - ) - print("SEARCH2: ", search) - - if search_request.query: - for field_name, expr in search_request.query.items(): - field = "properties__" + field_name - for op, value in expr.items(): - search = self.database.apply_stacql_filter( - search=search, op=op, field=field, value=value - ) - - # only cql2_json is supported here - if hasattr(search_request, "filter"): - cql2_filter = getattr(search_request, "filter", None) - try: - search = self.database.apply_cql2_filter(search, cql2_filter) - except Exception as e: - raise HTTPException( - status_code=400, detail=f"Error with cql2_json filter: {e}" - ) - - sort = None - if search_request.sortby: - sort = self.database.populate_sort(search_request.sortby) - - limit = 10 - if search_request.limit: - limit = search_request.limit - - items, maybe_count, next_token = await self.database.execute_search( - search=search, - limit=limit, - token=search_request.token, # type: ignore - sort=sort, - collection_ids=search_request.collections, - ) - - items = [ - self.item_serializer.db_to_stac(item, base_url=base_url) for item in items - ] - - if self.extension_is_enabled("FieldsExtension"): - if search_request.query is not None: - query_include: Set[str] = set( - [ - k if k in Settings.get().indexed_fields else f"properties.{k}" - for k in search_request.query.keys() - ] - ) - if not search_request.fields.include: - search_request.fields.include = query_include - else: - search_request.fields.include.union(query_include) - - filter_kwargs = search_request.fields.filter_fields - - items = [ - orjson.loads( - stac_pydantic.Item(**feat).json(**filter_kwargs, exclude_unset=True) - ) - for feat in items - ] - - context_obj = None - if self.extension_is_enabled("ContextExtension"): - context_obj = { - "returned": len(items), - "limit": limit, - } - if maybe_count is not None: - context_obj["matched"] = maybe_count - - links = [] - if next_token: - links = await PagingLinks(request=request, next=next_token).get_links() - - return ItemCollection( - type="FeatureCollection", - features=items, - links=links, - context=context_obj, - ) - - -@attr.s -class TransactionsClient(AsyncBaseTransactionsClient): - """Transactions extension specific CRUD operations.""" - - database: BaseDatabaseLogic = attr.ib() - settings: ApiBaseSettings = attr.ib() - session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) - - @overrides - async def create_item( - self, collection_id: str, item: stac_types.Item, **kwargs - ) -> Optional[stac_types.Item]: - """Create an item in the collection. - - Args: - collection_id (str): The id of the collection to add the item to. - item (stac_types.Item): The item to be added to the collection. - kwargs: Additional keyword arguments. - - Returns: - stac_types.Item: The created item. - - Raises: - NotFound: If the specified collection is not found in the database. - ConflictError: If the item in the specified collection already exists. - - """ - base_url = str(kwargs["request"].base_url) - - # If a feature collection is posted - if item["type"] == "FeatureCollection": - bulk_client = BulkTransactionsClient( - database=self.database, settings=self.settings - ) - processed_items = [ - bulk_client.preprocess_item(item, base_url, BulkTransactionMethod.INSERT) for item in item["features"] # type: ignore - ] - - await self.database.bulk_async( - collection_id, processed_items, refresh=kwargs.get("refresh", False) - ) - - return None - else: - item = await self.database.prep_create_item(item=item, base_url=base_url) - await self.database.create_item(item, refresh=kwargs.get("refresh", False)) - return item - - @overrides - async def update_item( - self, collection_id: str, item_id: str, item: stac_types.Item, **kwargs - ) -> stac_types.Item: - """Update an item in the collection. - - Args: - collection_id (str): The ID of the collection the item belongs to. - item_id (str): The ID of the item to be updated. - item (stac_types.Item): The new item data. - kwargs: Other optional arguments, including the request object. - - Returns: - stac_types.Item: The updated item object. - - Raises: - NotFound: If the specified collection is not found in the database. - - """ - base_url = str(kwargs["request"].base_url) - now = datetime_type.now(timezone.utc).isoformat().replace("+00:00", "Z") - item["properties"]["updated"] = now - - await self.database.check_collection_exists(collection_id) - await self.delete_item(item_id=item_id, collection_id=collection_id) - await self.create_item(collection_id=collection_id, item=item, **kwargs) - - return ItemSerializer.db_to_stac(item, base_url) - - @overrides - async def delete_item( - self, item_id: str, collection_id: str, **kwargs - ) -> Optional[stac_types.Item]: - """Delete an item from a collection. - - Args: - item_id (str): The identifier of the item to delete. - collection_id (str): The identifier of the collection that contains the item. - - Returns: - Optional[stac_types.Item]: The deleted item, or `None` if the item was successfully deleted. - """ - await self.database.delete_item(item_id=item_id, collection_id=collection_id) - return None - - @overrides - async def create_collection( - self, collection: stac_types.Collection, **kwargs - ) -> stac_types.Collection: - """Create a new collection in the database. - - Args: - collection (stac_types.Collection): The collection to be created. - kwargs: Additional keyword arguments. - - Returns: - stac_types.Collection: The created collection object. - - Raises: - ConflictError: If the collection already exists. - """ - base_url = str(kwargs["request"].base_url) - collection_links = CollectionLinks( - collection_id=collection["id"], base_url=base_url - ).create_links() - collection["links"] = collection_links - await self.database.create_collection(collection=collection) - - return CollectionSerializer.db_to_stac(collection, base_url) - - @overrides - async def update_collection( - self, collection: stac_types.Collection, **kwargs - ) -> stac_types.Collection: - """ - Update a collection. - - This method updates an existing collection in the database by first finding - the collection by the id given in the keyword argument `collection_id`. - If no `collection_id` is given the id of the given collection object is used. - If the object and keyword collection ids don't match the sub items - collection id is updated else the items are left unchanged. - The updated collection is then returned. - - Args: - collection: A STAC collection that needs to be updated. - kwargs: Additional keyword arguments. - - Returns: - A STAC collection that has been updated in the database. - - """ - base_url = str(kwargs["request"].base_url) - - collection_id = kwargs["request"].query_params.get( - "collection_id", collection["id"] - ) - - collection_links = CollectionLinks( - collection_id=collection["id"], base_url=base_url - ).create_links() - collection["links"] = collection_links - - await self.database.update_collection( - collection_id=collection_id, collection=collection - ) - - return CollectionSerializer.db_to_stac(collection, base_url) - - @overrides - async def delete_collection( - self, collection_id: str, **kwargs - ) -> Optional[stac_types.Collection]: - """ - Delete a collection. - - This method deletes an existing collection in the database. - - Args: - collection_id (str): The identifier of the collection that contains the item. - kwargs: Additional keyword arguments. - - Returns: - None. - - Raises: - NotFoundError: If the collection doesn't exist. - """ - await self.database.delete_collection(collection_id=collection_id) - return None - - -@attr.s -class BulkTransactionsClient(BaseBulkTransactionsClient): - """A client for posting bulk transactions to a Postgres database. - - Attributes: - session: An instance of `Session` to use for database connection. - database: An instance of `DatabaseLogic` to perform database operations. - """ - - database: BaseDatabaseLogic = attr.ib() - settings: ApiBaseSettings = attr.ib() - session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) - - def __attrs_post_init__(self): - """Create es engine.""" - self.client = self.settings.create_client - - def preprocess_item( - self, item: stac_types.Item, base_url, method: BulkTransactionMethod - ) -> stac_types.Item: - """Preprocess an item to match the data model. - - Args: - item: The item to preprocess. - base_url: The base URL of the request. - method: The bulk transaction method. - - Returns: - The preprocessed item. - """ - exist_ok = method == BulkTransactionMethod.UPSERT - return self.database.sync_prep_create_item( - item=item, base_url=base_url, exist_ok=exist_ok - ) - - @overrides - def bulk_item_insert( - self, items: Items, chunk_size: Optional[int] = None, **kwargs - ) -> str: - """Perform a bulk insertion of items into the database using Elasticsearch. - - Args: - items: The items to insert. - chunk_size: The size of each chunk for bulk processing. - **kwargs: Additional keyword arguments, such as `request` and `refresh`. - - Returns: - A string indicating the number of items successfully added. - """ - request = kwargs.get("request") - if request: - base_url = str(request.base_url) - else: - base_url = "" - - processed_items = [ - self.preprocess_item(item, base_url, items.method) - for item in items.items.values() - ] - - # not a great way to get the collection_id-- should be part of the method signature - collection_id = processed_items[0]["collection"] - - self.database.bulk_sync( - collection_id, processed_items, refresh=kwargs.get("refresh", False) - ) - - return f"Successfully added {len(processed_items)} Items." - - -@attr.s -class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): - """Defines a pattern for implementing the STAC filter extension.""" - - # todo: use the ES _mapping endpoint to dynamically find what fields exist - async def get_queryables( - self, collection_id: Optional[str] = None, **kwargs - ) -> Dict[str, Any]: - """Get the queryables available for the given collection_id. - - If collection_id is None, returns the intersection of all - queryables over all collections. - - This base implementation returns a blank queryable schema. This is not allowed - under OGC CQL but it is allowed by the STAC API Filter Extension - - https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables - - Args: - collection_id (str, optional): The id of the collection to get queryables for. - **kwargs: additional keyword arguments - - Returns: - Dict[str, Any]: A dictionary containing the queryables for the given collection. - """ - return { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://stac-api.example.com/queryables", - "type": "object", - "title": "Queryables for Example STAC API", - "description": "Queryable names for the example STAC API Item Search filter.", - "properties": { - "id": { - "description": "ID", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", - }, - "collection": { - "description": "Collection", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", - }, - "geometry": { - "description": "Geometry", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", - }, - "datetime": { - "description": "Acquisition Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", - }, - "created": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", - }, - "updated": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", - }, - "cloud_cover": { - "description": "Cloud Cover", - "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", - }, - "cloud_shadow_percentage": { - "description": "Cloud Shadow Percentage", - "title": "Cloud Shadow Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - "nodata_pixel_percentage": { - "description": "No Data Pixel Percentage", - "title": "No Data Pixel Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - }, - "additionalProperties": True, - } diff --git a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py deleted file mode 100644 index 2b7a3017..00000000 --- a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py +++ /dev/null @@ -1,14 +0,0 @@ -"""A few datetime methods.""" -from datetime import datetime, timezone - -from pystac.utils import datetime_to_str - - -def now_in_utc() -> datetime: - """Return a datetime value of now with the UTC timezone applied.""" - return datetime.now(timezone.utc) - - -def now_to_rfc3339_str() -> str: - """Return an RFC 3339 string representing now.""" - return datetime_to_str(now_in_utc()) diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py b/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py deleted file mode 100644 index 7ee6eea5..00000000 --- a/stac_fastapi/core/stac_fastapi/core/extensions/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""elasticsearch extensions modifications.""" - -from .query import Operator, QueryableTypes, QueryExtension - -__all__ = ["Operator", "QueryableTypes", "QueryExtension"] diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py deleted file mode 100644 index fe691ddf..00000000 --- a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py +++ /dev/null @@ -1,267 +0,0 @@ -""" -Implements Filter Extension. - -Basic CQL2 (AND, OR, NOT), comparison operators (=, <>, <, <=, >, >=), and IS NULL. -The comparison operators are allowed against string, numeric, boolean, date, and datetime types. - -Advanced comparison operators (http://www.opengis.net/spec/cql2/1.0/req/advanced-comparison-operators) -defines the LIKE, IN, and BETWEEN operators. - -Basic Spatial Operators (http://www.opengis.net/spec/cql2/1.0/conf/basic-spatial-operators) -defines the intersects operator (S_INTERSECTS). -""" -from __future__ import annotations - -import datetime -import re -from enum import Enum -from typing import List, Union - -from geojson_pydantic import ( - GeometryCollection, - LineString, - MultiLineString, - MultiPoint, - MultiPolygon, - Point, - Polygon, -) -from pydantic import BaseModel - -queryables_mapping = { - "id": "id", - "collection": "collection", - "geometry": "geometry", - "datetime": "properties.datetime", - "created": "properties.created", - "updated": "properties.updated", - "cloud_cover": "properties.eo:cloud_cover", - "cloud_shadow_percentage": "properties.s2:cloud_shadow_percentage", - "nodata_pixel_percentage": "properties.s2:nodata_pixel_percentage", -} - - -class LogicalOp(str, Enum): - """Logical operator. - - CQL2 logical operators and, or, and not. - """ - - _and = "and" - _or = "or" - _not = "not" - - -class ComparisonOp(str, Enum): - """Comparison operator. - - CQL2 comparison operators =, <>, <, <=, >, >=, and isNull. - """ - - eq = "=" - neq = "<>" - lt = "<" - lte = "<=" - gt = ">" - gte = ">=" - is_null = "isNull" - - def to_es(self): - """Generate an Elasticsearch term operator.""" - if self == ComparisonOp.lt: - return "lt" - elif self == ComparisonOp.lte: - return "lte" - elif self == ComparisonOp.gt: - return "gt" - elif self == ComparisonOp.gte: - return "gte" - else: - raise RuntimeError( - f"Comparison op {self.value} does not have an Elasticsearch term operator equivalent." - ) - - -class AdvancedComparisonOp(str, Enum): - """Advanced Comparison operator. - - CQL2 advanced comparison operators like (~), between, and in. - """ - - like = "like" - between = "between" - _in = "in" - - -class SpatialIntersectsOp(str, Enum): - """Spatial intersections operator s_intersects.""" - - s_intersects = "s_intersects" - - -class PropertyReference(BaseModel): - """Property reference.""" - - property: str - - def to_es(self): - """Produce a term value for this, possibly mapped by a queryable.""" - return queryables_mapping.get(self.property, self.property) - - -class Timestamp(BaseModel): - """Representation of an RFC 3339 datetime value object.""" - - timestamp: datetime.datetime - - def to_es(self): - """Produce an RFC 3339 datetime string.""" - return self.timestamp.isoformat() - - -class Date(BaseModel): - """Representation of an ISO 8601 date value object.""" - - date: datetime.date - - def to_es(self): - """Produce an ISO 8601 date string.""" - return self.date.isoformat() - - -class FloatInt(float): - """Representation of Float/Int.""" - - @classmethod - def __get_validators__(cls): - """Return validator to use.""" - yield cls.validate - - @classmethod - def validate(cls, v): - """Validate input value.""" - if isinstance(v, float): - return v - else: - return int(v) - - -Arg = Union[ - "Clause", - PropertyReference, - Timestamp, - Date, - Point, - MultiPoint, - LineString, - MultiLineString, - Polygon, - MultiPolygon, - GeometryCollection, - FloatInt, - str, - bool, -] - - -class Clause(BaseModel): - """Filter extension clause.""" - - op: Union[LogicalOp, ComparisonOp, AdvancedComparisonOp, SpatialIntersectsOp] - args: List[Union[Arg, List[Arg]]] - - def to_es(self): - """Generate an Elasticsearch expression for this Clause.""" - if self.op == LogicalOp._and: - return {"bool": {"filter": [to_es(arg) for arg in self.args]}} - elif self.op == LogicalOp._or: - return {"bool": {"should": [to_es(arg) for arg in self.args]}} - elif self.op == LogicalOp._not: - return {"bool": {"must_not": [to_es(arg) for arg in self.args]}} - elif self.op == ComparisonOp.eq: - return {"term": {to_es(self.args[0]): to_es(self.args[1])}} - elif self.op == ComparisonOp.neq: - return { - "bool": { - "must_not": [{"term": {to_es(self.args[0]): to_es(self.args[1])}}] - } - } - elif self.op == AdvancedComparisonOp.like: - return { - "wildcard": { - to_es(self.args[0]): { - "value": cql2_like_to_es(str(to_es(self.args[1]))), - "case_insensitive": "false", - } - } - } - elif self.op == AdvancedComparisonOp.between: - return { - "range": { - to_es(self.args[0]): { - "gte": to_es(self.args[1]), - "lte": to_es(self.args[2]), - } - } - } - elif self.op == AdvancedComparisonOp._in: - if not isinstance(self.args[1], List): - raise RuntimeError(f"Arg {self.args[1]} is not a list") - return { - "terms": {to_es(self.args[0]): [to_es(arg) for arg in self.args[1]]} - } - elif ( - self.op == ComparisonOp.lt - or self.op == ComparisonOp.lte - or self.op == ComparisonOp.gt - or self.op == ComparisonOp.gte - ): - return { - "range": {to_es(self.args[0]): {to_es(self.op): to_es(self.args[1])}} - } - elif self.op == ComparisonOp.is_null: - return {"bool": {"must_not": {"exists": {"field": to_es(self.args[0])}}}} - elif self.op == SpatialIntersectsOp.s_intersects: - return { - "geo_shape": { - to_es(self.args[0]): { - "shape": to_es(self.args[1]), - "relation": "intersects", - } - } - } - - -def to_es(arg: Arg): - """Generate an Elasticsearch expression for this Arg.""" - if (to_es_method := getattr(arg, "to_es", None)) and callable(to_es_method): - return to_es_method() - elif gi := getattr(arg, "__geo_interface__", None): - return gi - elif isinstance(arg, GeometryCollection): - return arg.dict() - elif ( - isinstance(arg, int) - or isinstance(arg, float) - or isinstance(arg, str) - or isinstance(arg, bool) - ): - return arg - else: - raise RuntimeError(f"unknown arg {repr(arg)}") - - -def cql2_like_to_es(string): - """Convert wildcard characters in CQL2 ('_' and '%') to Elasticsearch wildcard characters ('?' and '*', respectively). Handle escape characters and pass through Elasticsearch wildcards.""" - percent_pattern = r"(? Callable[[Any, Any], bool]: - """Return python operator.""" - return getattr(operator, self._value_) - - -class Queryables(str, AutoValueEnum): - """Queryable fields.""" - - ... - - -@dataclass -class QueryableTypes: - """Defines a set of queryable fields.""" - - ... - - -class QueryExtensionPostRequest(BaseModel): - """Queryable validation. - - Add queryables validation to the POST request - to raise errors for unsupported querys. - """ - - query: Optional[Dict[Queryables, Dict[Operator, Any]]] - - @root_validator(pre=True) - def validate_query_fields(cls, values: Dict) -> Dict: - """Validate query fields.""" - ... - - -class QueryExtension(QueryExtensionBase): - """Query Extenson. - - Override the POST request model to add validation against - supported fields - """ - - ... diff --git a/stac_fastapi/core/stac_fastapi/core/models/__init__.py b/stac_fastapi/core/stac_fastapi/core/models/__init__.py deleted file mode 100644 index d0748bcc..00000000 --- a/stac_fastapi/core/stac_fastapi/core/models/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""stac_fastapi.elasticsearch.models module.""" diff --git a/stac_fastapi/core/stac_fastapi/core/models/links.py b/stac_fastapi/core/stac_fastapi/core/models/links.py deleted file mode 100644 index 3941a149..00000000 --- a/stac_fastapi/core/stac_fastapi/core/models/links.py +++ /dev/null @@ -1,138 +0,0 @@ -"""link helpers.""" - -from typing import Any, Dict, List, Optional -from urllib.parse import ParseResult, parse_qs, unquote, urlencode, urljoin, urlparse - -import attr -from stac_pydantic.links import Relations -from stac_pydantic.shared import MimeTypes -from starlette.requests import Request - -# Copied from pgstac links - -# These can be inferred from the item/collection, so they aren't included in the database -# Instead they are dynamically generated when querying the database using the classes defined below -INFERRED_LINK_RELS = ["self", "item", "parent", "collection", "root"] - - -def merge_params(url: str, newparams: Dict) -> str: - """Merge url parameters.""" - u = urlparse(url) - params = parse_qs(u.query) - params.update(newparams) - param_string = unquote(urlencode(params, True)) - - href = ParseResult( - scheme=u.scheme, - netloc=u.netloc, - path=u.path, - params=u.params, - query=param_string, - fragment=u.fragment, - ).geturl() - return href - - -@attr.s -class BaseLinks: - """Create inferred links common to collections and items.""" - - request: Request = attr.ib() - - @property - def base_url(self): - """Get the base url.""" - return str(self.request.base_url) - - @property - def url(self): - """Get the current request url.""" - return str(self.request.url) - - def resolve(self, url): - """Resolve url to the current request url.""" - return urljoin(str(self.base_url), str(url)) - - def link_self(self) -> Dict: - """Return the self link.""" - return dict(rel=Relations.self.value, type=MimeTypes.json.value, href=self.url) - - def link_root(self) -> Dict: - """Return the catalog root.""" - return dict( - rel=Relations.root.value, type=MimeTypes.json.value, href=self.base_url - ) - - def create_links(self) -> List[Dict[str, Any]]: - """Return all inferred links.""" - links = [] - for name in dir(self): - if name.startswith("link_") and callable(getattr(self, name)): - link = getattr(self, name)() - if link is not None: - links.append(link) - return links - - async def get_links( - self, extra_links: Optional[List[Dict[str, Any]]] = None - ) -> List[Dict[str, Any]]: - """ - Generate all the links. - - Get the links object for a stac resource by iterating through - available methods on this class that start with link_. - """ - # TODO: Pass request.json() into function so this doesn't need to be coroutine - if self.request.method == "POST": - self.request.postbody = await self.request.json() - # join passed in links with generated links - # and update relative paths - links = self.create_links() - - if extra_links: - # For extra links passed in, - # add links modified with a resolved href. - # Drop any links that are dynamically - # determined by the server (e.g. self, parent, etc.) - # Resolving the href allows for relative paths - # to be stored in pgstac and for the hrefs in the - # links of response STAC objects to be resolved - # to the request url. - links += [ - {**link, "href": self.resolve(link["href"])} - for link in extra_links - if link["rel"] not in INFERRED_LINK_RELS - ] - - return links - - -@attr.s -class PagingLinks(BaseLinks): - """Create links for paging.""" - - next: Optional[str] = attr.ib(kw_only=True, default=None) - - def link_next(self) -> Optional[Dict[str, Any]]: - """Create link for next page.""" - if self.next is not None: - method = self.request.method - if method == "GET": - href = merge_params(self.url, {"token": self.next}) - link = dict( - rel=Relations.next.value, - type=MimeTypes.json.value, - method=method, - href=href, - ) - return link - if method == "POST": - return { - "rel": Relations.next, - "type": MimeTypes.json, - "method": method, - "href": f"{self.request.url}", - "body": {**self.request.postbody, "token": self.next}, - } - - return None diff --git a/stac_fastapi/core/stac_fastapi/core/models/search.py b/stac_fastapi/core/stac_fastapi/core/models/search.py deleted file mode 100644 index 33b73b68..00000000 --- a/stac_fastapi/core/stac_fastapi/core/models/search.py +++ /dev/null @@ -1 +0,0 @@ -"""Unused search model.""" diff --git a/stac_fastapi/core/stac_fastapi/core/serializers.py b/stac_fastapi/core/stac_fastapi/core/serializers.py deleted file mode 100644 index 8e83ef7c..00000000 --- a/stac_fastapi/core/stac_fastapi/core/serializers.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Serializers.""" -import abc -from copy import deepcopy -from typing import Any - -import attr - -from stac_fastapi.core.datetime_utils import now_to_rfc3339_str -from stac_fastapi.types import stac as stac_types -from stac_fastapi.types.links import CollectionLinks, ItemLinks, resolve_links - - -@attr.s -class Serializer(abc.ABC): - """Defines serialization methods between the API and the data model. - - This class is meant to be subclassed and implemented by specific serializers for different STAC objects (e.g. Item, Collection). - """ - - @classmethod - @abc.abstractmethod - def db_to_stac(cls, item: dict, base_url: str) -> Any: - """Transform database model to STAC object. - - Arguments: - item (dict): A dictionary representing the database model. - base_url (str): The base URL of the STAC API. - - Returns: - Any: A STAC object, e.g. an `Item` or `Collection`, representing the input `item`. - """ - ... - - @classmethod - @abc.abstractmethod - def stac_to_db(cls, stac_object: Any, base_url: str) -> dict: - """Transform STAC object to database model. - - Arguments: - stac_object (Any): A STAC object, e.g. an `Item` or `Collection`. - base_url (str): The base URL of the STAC API. - - Returns: - dict: A dictionary representing the database model. - """ - ... - - -class ItemSerializer(Serializer): - """Serialization methods for STAC items.""" - - @classmethod - def stac_to_db(cls, stac_data: stac_types.Item, base_url: str) -> stac_types.Item: - """Transform STAC item to database-ready STAC item. - - Args: - stac_data (stac_types.Item): The STAC item object to be transformed. - base_url (str): The base URL for the STAC API. - - Returns: - stac_types.Item: The database-ready STAC item object. - """ - item_links = ItemLinks( - collection_id=stac_data["collection"], - item_id=stac_data["id"], - base_url=base_url, - ).create_links() - stac_data["links"] = item_links - - now = now_to_rfc3339_str() - if "created" not in stac_data["properties"]: - stac_data["properties"]["created"] = now - stac_data["properties"]["updated"] = now - return stac_data - - @classmethod - def db_to_stac(cls, item: dict, base_url: str) -> stac_types.Item: - """Transform database-ready STAC item to STAC item. - - Args: - item (dict): The database-ready STAC item to be transformed. - base_url (str): The base URL for the STAC API. - - Returns: - stac_types.Item: The STAC item object. - """ - item_id = item["id"] - collection_id = item["collection"] - item_links = ItemLinks( - collection_id=collection_id, item_id=item_id, base_url=base_url - ).create_links() - - original_links = item.get("links", []) - if original_links: - item_links += resolve_links(original_links, base_url) - - return stac_types.Item( - type="Feature", - stac_version=item.get("stac_version", ""), - stac_extensions=item.get("stac_extensions", []), - id=item_id, - collection=item.get("collection", ""), - geometry=item.get("geometry", {}), - bbox=item.get("bbox", []), - properties=item.get("properties", {}), - links=item_links, - assets=item.get("assets", {}), - ) - - -class CollectionSerializer(Serializer): - """Serialization methods for STAC collections.""" - - @classmethod - def db_to_stac(cls, collection: dict, base_url: str) -> stac_types.Collection: - """Transform database model to STAC collection. - - Args: - collection (dict): The collection data in dictionary form, extracted from the database. - base_url (str): The base URL for the collection. - - Returns: - stac_types.Collection: The STAC collection object. - """ - # Avoid modifying the input dict in-place ... doing so breaks some tests - collection = deepcopy(collection) - - # Set defaults - collection_id = collection.get("id") - collection.setdefault("type", "Collection") - collection.setdefault("stac_extensions", []) - collection.setdefault("stac_version", "") - collection.setdefault("title", "") - collection.setdefault("description", "") - collection.setdefault("keywords", []) - collection.setdefault("license", "") - collection.setdefault("providers", []) - collection.setdefault("summaries", {}) - collection.setdefault( - "extent", {"spatial": {"bbox": []}, "temporal": {"interval": []}} - ) - collection.setdefault("assets", {}) - - # Create the collection links using CollectionLinks - collection_links = CollectionLinks( - collection_id=collection_id, base_url=base_url - ).create_links() - - # Add any additional links from the collection dictionary - original_links = collection.get("links") - if original_links: - collection_links += resolve_links(original_links, base_url) - collection["links"] = collection_links - - # Return the stac_types.Collection object - return stac_types.Collection(**collection) diff --git a/stac_fastapi/core/stac_fastapi/core/session.py b/stac_fastapi/core/stac_fastapi/core/session.py deleted file mode 100644 index d5a7aa3c..00000000 --- a/stac_fastapi/core/stac_fastapi/core/session.py +++ /dev/null @@ -1,25 +0,0 @@ -"""database session management.""" -import logging - -import attr - -logger = logging.getLogger(__name__) - - -@attr.s -class Session: - """Database session management.""" - - @classmethod - def create_from_env(cls): - """Create from environment.""" - ... - - @classmethod - def create_from_settings(cls, settings): - """Create a Session object from settings.""" - ... - - def __attrs_post_init__(self): - """Post init handler.""" - ... diff --git a/stac_fastapi/core/stac_fastapi/core/types/core.py b/stac_fastapi/core/stac_fastapi/core/types/core.py deleted file mode 100644 index 1212619c..00000000 --- a/stac_fastapi/core/stac_fastapi/core/types/core.py +++ /dev/null @@ -1,306 +0,0 @@ -"""Base clients. Takef from stac-fastapi.types.core v2.4.9.""" -import abc -from datetime import datetime -from typing import Any, Dict, List, Optional, Union - -import attr -from starlette.responses import Response - -from stac_fastapi.core.base_database_logic import BaseDatabaseLogic -from stac_fastapi.types import stac as stac_types -from stac_fastapi.types.conformance import BASE_CONFORMANCE_CLASSES -from stac_fastapi.types.extension import ApiExtension -from stac_fastapi.types.search import BaseSearchPostRequest -from stac_fastapi.types.stac import Conformance - -NumType = Union[float, int] -StacType = Dict[str, Any] - - -@attr.s -class AsyncBaseTransactionsClient(abc.ABC): - """Defines a pattern for implementing the STAC transaction extension.""" - - database = attr.ib(default=BaseDatabaseLogic) - - @abc.abstractmethod - async def create_item( - self, - collection_id: str, - item: Union[stac_types.Item, stac_types.ItemCollection], - **kwargs, - ) -> Optional[Union[stac_types.Item, Response, None]]: - """Create a new item. - - Called with `POST /collections/{collection_id}/items`. - - Args: - item: the item or item collection - collection_id: the id of the collection from the resource path - - Returns: - The item that was created or None if item collection. - """ - ... - - @abc.abstractmethod - async def update_item( - self, collection_id: str, item_id: str, item: stac_types.Item, **kwargs - ) -> Optional[Union[stac_types.Item, Response]]: - """Perform a complete update on an existing item. - - Called with `PUT /collections/{collection_id}/items`. It is expected - that this item already exists. The update should do a diff against the - saved item and perform any necessary updates. Partial updates are not - supported by the transactions extension. - - Args: - item: the item (must be complete) - - Returns: - The updated item. - """ - ... - - @abc.abstractmethod - async def delete_item( - self, item_id: str, collection_id: str, **kwargs - ) -> Optional[Union[stac_types.Item, Response]]: - """Delete an item from a collection. - - Called with `DELETE /collections/{collection_id}/items/{item_id}` - - Args: - item_id: id of the item. - collection_id: id of the collection. - - Returns: - The deleted item. - """ - ... - - @abc.abstractmethod - async def create_collection( - self, collection: stac_types.Collection, **kwargs - ) -> Optional[Union[stac_types.Collection, Response]]: - """Create a new collection. - - Called with `POST /collections`. - - Args: - collection: the collection - - Returns: - The collection that was created. - """ - ... - - @abc.abstractmethod - async def update_collection( - self, collection: stac_types.Collection, **kwargs - ) -> Optional[Union[stac_types.Collection, Response]]: - """Perform a complete update on an existing collection. - - Called with `PUT /collections`. It is expected that this item already - exists. The update should do a diff against the saved collection and - perform any necessary updates. Partial updates are not supported by the - transactions extension. - - Args: - collection: the collection (must be complete) - - Returns: - The updated collection. - """ - ... - - @abc.abstractmethod - async def delete_collection( - self, collection_id: str, **kwargs - ) -> Optional[Union[stac_types.Collection, Response]]: - """Delete a collection. - - Called with `DELETE /collections/{collection_id}` - - Args: - collection_id: id of the collection. - - Returns: - The deleted collection. - """ - ... - - -@attr.s # type:ignore -class AsyncBaseCoreClient(abc.ABC): - """Defines a pattern for implementing STAC api core endpoints. - - Attributes: - extensions: list of registered api extensions. - """ - - database = attr.ib(default=BaseDatabaseLogic) - - base_conformance_classes: List[str] = attr.ib( - factory=lambda: BASE_CONFORMANCE_CLASSES - ) - extensions: List[ApiExtension] = attr.ib(default=attr.Factory(list)) - post_request_model = attr.ib(default=BaseSearchPostRequest) - - def conformance_classes(self) -> List[str]: - """Generate conformance classes.""" - conformance_classes = self.base_conformance_classes.copy() - - for extension in self.extensions: - extension_classes = getattr(extension, "conformance_classes", []) - conformance_classes.extend(extension_classes) - - return list(set(conformance_classes)) - - def extension_is_enabled(self, extension: str) -> bool: - """Check if an api extension is enabled.""" - return any([type(ext).__name__ == extension for ext in self.extensions]) - - async def conformance(self, **kwargs) -> stac_types.Conformance: - """Conformance classes. - - Called with `GET /conformance`. - - Returns: - Conformance classes which the server conforms to. - """ - return Conformance(conformsTo=self.conformance_classes()) - - @abc.abstractmethod - async def post_search( - self, search_request: BaseSearchPostRequest, **kwargs - ) -> stac_types.ItemCollection: - """Cross catalog search (POST). - - Called with `POST /search`. - - Args: - search_request: search request parameters. - - Returns: - ItemCollection containing items which match the search criteria. - """ - ... - - @abc.abstractmethod - async def get_search( - self, - collections: Optional[List[str]] = None, - ids: Optional[List[str]] = None, - bbox: Optional[List[NumType]] = None, - datetime: Optional[Union[str, datetime]] = None, - limit: Optional[int] = 10, - query: Optional[str] = None, - token: Optional[str] = None, - fields: Optional[List[str]] = None, - sortby: Optional[str] = None, - intersects: Optional[str] = None, - **kwargs, - ) -> stac_types.ItemCollection: - """Cross catalog search (GET). - - Called with `GET /search`. - - Returns: - ItemCollection containing items which match the search criteria. - """ - ... - - @abc.abstractmethod - async def get_item( - self, item_id: str, collection_id: str, **kwargs - ) -> stac_types.Item: - """Get item by id. - - Called with `GET /collections/{collection_id}/items/{item_id}`. - - Args: - item_id: Id of the item. - collection_id: Id of the collection. - - Returns: - Item. - """ - ... - - @abc.abstractmethod - async def all_collections(self, **kwargs) -> stac_types.Collections: - """Get all available collections. - - Called with `GET /collections`. - - Returns: - A list of collections. - """ - ... - - @abc.abstractmethod - async def get_collection( - self, collection_id: str, **kwargs - ) -> stac_types.Collection: - """Get collection by id. - - Called with `GET /collections/{collection_id}`. - - Args: - collection_id: Id of the collection. - - Returns: - Collection. - """ - ... - - @abc.abstractmethod - async def item_collection( - self, - collection_id: str, - bbox: Optional[List[NumType]] = None, - datetime: Optional[Union[str, datetime]] = None, - limit: int = 10, - token: str = None, - **kwargs, - ) -> stac_types.ItemCollection: - """Get all items from a specific collection. - - Called with `GET /collections/{collection_id}/items` - - Args: - collection_id: id of the collection. - limit: number of items to return. - token: pagination token. - - Returns: - An ItemCollection. - """ - ... - - -@attr.s -class AsyncBaseFiltersClient(abc.ABC): - """Defines a pattern for implementing the STAC filter extension.""" - - async def get_queryables( - self, collection_id: Optional[str] = None, **kwargs - ) -> Dict[str, Any]: - """Get the queryables available for the given collection_id. - - If collection_id is None, returns the intersection of all queryables over all - collections. - - This base implementation returns a blank queryable schema. This is not allowed - under OGC CQL but it is allowed by the STAC API Filter Extension - https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables - """ - return { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://example.org/queryables", - "type": "object", - "title": "Queryables for Example STAC API", - "description": "Queryable names for the example STAC API Item Search filter.", - "properties": {}, - } diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py deleted file mode 100644 index b5dac390..00000000 --- a/stac_fastapi/core/stac_fastapi/core/utilities.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Module for geospatial processing functions. - -This module contains functions for transforming geospatial coordinates, -such as converting bounding boxes to polygon representations. -""" -from typing import List - - -def bbox2polygon(b0: float, b1: float, b2: float, b3: float) -> List[List[List[float]]]: - """Transform a bounding box represented by its four coordinates `b0`, `b1`, `b2`, and `b3` into a polygon. - - Args: - b0 (float): The x-coordinate of the lower-left corner of the bounding box. - b1 (float): The y-coordinate of the lower-left corner of the bounding box. - b2 (float): The x-coordinate of the upper-right corner of the bounding box. - b3 (float): The y-coordinate of the upper-right corner of the bounding box. - - Returns: - List[List[List[float]]]: A polygon represented as a list of lists of coordinates. - """ - return [[[b0, b1], [b2, b1], [b2, b3], [b0, b3], [b0, b1]]] diff --git a/stac_fastapi/core/stac_fastapi/core/version.py b/stac_fastapi/core/stac_fastapi/core/version.py deleted file mode 100644 index 6b648e2b..00000000 --- a/stac_fastapi/core/stac_fastapi/core/version.py +++ /dev/null @@ -1,2 +0,0 @@ -"""library version.""" -__version__ = "2.0.0" diff --git a/stac_fastapi/elasticsearch/README.md b/stac_fastapi/elasticsearch/README.md deleted file mode 100644 index becdb4d7..00000000 --- a/stac_fastapi/elasticsearch/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Requirements - -The Elasticsearch backend requires **elasticsearch**. diff --git a/stac_fastapi/elasticsearch/pytest.ini b/stac_fastapi/elasticsearch/pytest.ini deleted file mode 100644 index db0353ef..00000000 --- a/stac_fastapi/elasticsearch/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -testpaths = tests -addopts = -sv -asyncio_mode = auto \ No newline at end of file diff --git a/stac_fastapi/elasticsearch/setup.cfg b/stac_fastapi/elasticsearch/setup.cfg deleted file mode 100644 index 7a42432c..00000000 --- a/stac_fastapi/elasticsearch/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -version = attr: stac_fastapi.elasticsearch.version.__version__ diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py deleted file mode 100644 index 278cc356..00000000 --- a/stac_fastapi/elasticsearch/setup.py +++ /dev/null @@ -1,58 +0,0 @@ -"""stac_fastapi: elasticsearch module.""" - -from setuptools import find_namespace_packages, setup - -with open("README.md") as f: - desc = f.read() - -install_requires = [ - "stac-fastapi.core==2.0.0", - "elasticsearch[async]==8.11.0", - "elasticsearch-dsl==8.11.0", - "uvicorn", - "starlette", -] - -extra_reqs = { - "dev": [ - "pytest", - "pytest-cov", - "pytest-asyncio", - "pre-commit", - "requests", - "ciso8601", - "httpx", - ], - "docs": ["mkdocs", "mkdocs-material", "pdocs"], - "server": ["uvicorn[standard]==0.19.0"], -} - -setup( - name="stac-fastapi.elasticsearch", - description="An implementation of STAC API based on the FastAPI framework with both Elasticsearch and Opensearch.", - long_description=desc, - long_description_content_type="text/markdown", - python_requires=">=3.8", - classifiers=[ - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: MIT License", - ], - url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", - license="MIT", - packages=find_namespace_packages(exclude=["alembic", "tests", "scripts"]), - zip_safe=False, - install_requires=install_requires, - tests_require=extra_reqs["dev"], - extras_require=extra_reqs, - entry_points={ - "console_scripts": [ - "stac-fastapi-elasticsearch=stac_fastapi.elasticsearch.app:run" - ] - }, -) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py deleted file mode 100644 index dbb6116a..00000000 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""elasticsearch submodule.""" diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py deleted file mode 100644 index 0d896534..00000000 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ /dev/null @@ -1,109 +0,0 @@ -"""FastAPI application.""" - -from stac_fastapi.api.app import StacApi -from stac_fastapi.api.models import create_get_request_model, create_post_request_model -from stac_fastapi.core.core import ( - BulkTransactionsClient, - CoreClient, - EsAsyncBaseFiltersClient, - TransactionsClient, -) -from stac_fastapi.core.extensions import QueryExtension -from stac_fastapi.core.session import Session -from stac_fastapi.elasticsearch.config import ElasticsearchSettings -from stac_fastapi.elasticsearch.database_logic import ( - DatabaseLogic, - create_collection_index, -) -from stac_fastapi.extensions.core import ( - ContextExtension, - FieldsExtension, - FilterExtension, - SortExtension, - TokenPaginationExtension, - TransactionExtension, -) -from stac_fastapi.extensions.third_party import BulkTransactionExtension - -settings = ElasticsearchSettings() -session = Session.create_from_settings(settings) - -filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) -filter_extension.conformance_classes.append( - "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" -) - -database_logic = DatabaseLogic() - -extensions = [ - TransactionExtension( - client=TransactionsClient( - database=database_logic, session=session, settings=settings - ), - settings=settings, - ), - BulkTransactionExtension( - client=BulkTransactionsClient( - database=database_logic, - session=session, - settings=settings, - ) - ), - FieldsExtension(), - QueryExtension(), - SortExtension(), - TokenPaginationExtension(), - ContextExtension(), - filter_extension, -] - -post_request_model = create_post_request_model(extensions) - -api = StacApi( - settings=settings, - extensions=extensions, - client=CoreClient( - database=database_logic, session=session, post_request_model=post_request_model - ), - search_get_request_model=create_get_request_model(extensions), - search_post_request_model=post_request_model, -) -app = api.app - - -@app.on_event("startup") -async def _startup_event() -> None: - await create_collection_index() - - -def run() -> None: - """Run app from command line using uvicorn if available.""" - try: - import uvicorn - - uvicorn.run( - "stac_fastapi.elasticsearch.app:app", - host=settings.app_host, - port=settings.app_port, - log_level="info", - reload=settings.reload, - ) - except ImportError: - raise RuntimeError("Uvicorn must be installed in order to use command") - - -if __name__ == "__main__": - run() - - -def create_handler(app): - """Create a handler to use with AWS Lambda if mangum available.""" - try: - from mangum import Mangum - - return Mangum(app) - except ImportError: - return None - - -handler = create_handler(app) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py deleted file mode 100644 index 10cf95e9..00000000 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py +++ /dev/null @@ -1,80 +0,0 @@ -"""API configuration.""" -import os -import ssl -from typing import Any, Dict, Set - -from elasticsearch import AsyncElasticsearch, Elasticsearch # type: ignore -from stac_fastapi.types.config import ApiSettings - - -def _es_config() -> Dict[str, Any]: - # Determine the scheme (http or https) - use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" - scheme = "https" if use_ssl else "http" - - # Configure the hosts parameter with the correct scheme - hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] - - # Initialize the configuration dictionary - config = { - "hosts": hosts, - "headers": {"accept": "application/vnd.elasticsearch+json; compatible-with=7"}, - } - - # Explicitly exclude SSL settings when not using SSL - if not use_ssl: - return config - - # Include SSL settings if using https - config["ssl_version"] = ssl.TLSVersion.TLSv1_3 # type: ignore - config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore - - # Include CA Certificates if verifying certs - if config["verify_certs"]: - config["ca_certs"] = os.getenv( - "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" - ) - - # Handle authentication - if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): - config["http_auth"] = (u, p) - - if api_key := os.getenv("ES_API_KEY"): - if isinstance(config["headers"], dict): - headers = {**config["headers"], "x-api-key": api_key} - - else: - config["headers"] = {"x-api-key": api_key} - - config["headers"] = headers - - return config - - -_forbidden_fields: Set[str] = {"type"} - - -class ElasticsearchSettings(ApiSettings): - """API settings.""" - - # Fields which are defined by STAC but not included in the database model - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self): - """Create es client.""" - return Elasticsearch(**_es_config()) - - -class AsyncElasticsearchSettings(ApiSettings): - """API settings.""" - - # Fields which are defined by STAC but not included in the database model - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self): - """Create async elasticsearch client.""" - return AsyncElasticsearch(**_es_config()) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py deleted file mode 100644 index 87ca8916..00000000 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ /dev/null @@ -1,894 +0,0 @@ -"""Database logic.""" -import asyncio -import logging -import os -from base64 import urlsafe_b64decode, urlsafe_b64encode -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union - -import attr -from elasticsearch_dsl import Q, Search - -from elasticsearch import exceptions, helpers # type: ignore -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import bbox2polygon -from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings -from stac_fastapi.elasticsearch.config import ( - ElasticsearchSettings as SyncElasticsearchSettings, -) -from stac_fastapi.types.errors import ConflictError, NotFoundError -from stac_fastapi.types.stac import Collection, Item - -logger = logging.getLogger(__name__) - -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") -ES_INDEX_NAME_UNSUPPORTED_CHARS = { - "\\", - "/", - "*", - "?", - '"', - "<", - ">", - "|", - " ", - ",", - "#", - ":", -} - -ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" - -DEFAULT_SORT = { - "properties.datetime": {"order": "desc"}, - "id": {"order": "desc"}, - "collection": {"order": "desc"}, -} - -ES_ITEMS_SETTINGS = { - "index": { - "sort.field": list(DEFAULT_SORT.keys()), - "sort.order": [v["order"] for v in DEFAULT_SORT.values()], - } -} - -ES_MAPPINGS_DYNAMIC_TEMPLATES = [ - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - { - "descriptions": { - "match_mapping_type": "string", - "match": "description", - "mapping": {"type": "text"}, - } - }, - { - "titles": { - "match_mapping_type": "string", - "match": "title", - "mapping": {"type": "text"}, - } - }, - # Projection Extension https://github.com/stac-extensions/projection - {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, - { - "proj_projjson": { - "match": "proj:projjson", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "proj_centroid": { - "match": "proj:centroid", - "mapping": {"type": "geo_point"}, - } - }, - { - "proj_geometry": { - "match": "proj:geometry", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "no_index_href": { - "match": "href", - "mapping": {"type": "text", "index": False}, - } - }, - # Default all other strings not otherwise specified to keyword - {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, - {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, -] - -ES_ITEMS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "collection": {"type": "keyword"}, - "geometry": {"type": "geo_shape"}, - "assets": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "properties": { - "type": "object", - "properties": { - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, - "start_datetime": {"type": "date"}, - "end_datetime": {"type": "date"}, - "created": {"type": "date"}, - "updated": {"type": "date"}, - # Satellite Extension https://github.com/stac-extensions/sat - "sat:absolute_orbit": {"type": "integer"}, - "sat:relative_orbit": {"type": "integer"}, - }, - }, - }, -} - -ES_COLLECTIONS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, - "extent.temporal.interval": {"type": "date"}, - "providers": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "item_assets": {"type": "object", "enabled": False}, - }, -} - - -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None: - return ITEM_INDICES - else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) - - -async def create_collection_index() -> None: - """ - Create the index for a Collection. - - Returns: - None - - """ - client = AsyncElasticsearchSettings().create_client - - await client.options(ignore_status=400).indices.create( - index=f"{COLLECTIONS_INDEX}-000001", - aliases={COLLECTIONS_INDEX: {}}, - mappings=ES_COLLECTIONS_MAPPINGS, - ) - await client.close() - - -async def create_item_index(collection_id: str): - """ - Create the index for Items. - - Args: - collection_id (str): Collection identifier. - - Returns: - None - - """ - client = AsyncElasticsearchSettings().create_client - index_name = index_by_collection_id(collection_id) - - await client.options(ignore_status=400).indices.create( - index=f"{index_by_collection_id(collection_id)}-000001", - aliases={index_name: {}}, - mappings=ES_ITEMS_MAPPINGS, - settings=ES_ITEMS_SETTINGS, - ) - await client.close() - - -async def delete_item_index(collection_id: str): - """Delete the index for items in a collection. - - Args: - collection_id (str): The ID of the collection whose items index will be deleted. - """ - client = AsyncElasticsearchSettings().create_client - - name = index_by_collection_id(collection_id) - resolved = await client.indices.resolve_index(name=name) - if "aliases" in resolved and resolved["aliases"]: - [alias] = resolved["aliases"] - await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) - await client.indices.delete(index=alias["indices"]) - else: - await client.indices.delete(index=name) - await client.close() - - -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]): - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - return [ - { - "_index": index_by_collection_id(collection_id), - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, - } - for item in processed_items - ] - - -# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, -# So create our own Protocol for typing -# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - -@attr.s -class DatabaseLogic: - """Database logic.""" - - client = AsyncElasticsearchSettings().create_client - sync_client = SyncElasticsearchSettings().create_client - - item_serializer: Type[ItemSerializer] = attr.ib(default=ItemSerializer) - collection_serializer: Type[CollectionSerializer] = attr.ib( - default=CollectionSerializer - ) - - """CORE LOGIC""" - - async def get_all_collections( - self, token: Optional[str], limit: int, base_url: str - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - """Retrieve a list of all collections from Elasticsearch, supporting pagination. - - Args: - token (Optional[str]): The pagination token. - limit (int): The number of results to return. - - Returns: - A tuple of (collections, next pagination token if any). - """ - search_after = None - if token: - search_after = [token] - - response = await self.client.search( - index=COLLECTIONS_INDEX, - body={ - "sort": [{"id": {"order": "asc"}}], - "size": limit, - "search_after": search_after, - }, - ) - - hits = response["hits"]["hits"] - collections = [ - self.collection_serializer.db_to_stac( - collection=hit["_source"], base_url=base_url - ) - for hit in hits - ] - - next_token = None - if len(hits) == limit: - next_token = hits[-1]["sort"][0] - - return collections, next_token - - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: - """Retrieve a single item from the database. - - Args: - collection_id (str): The id of the Collection that the Item belongs to. - item_id (str): The id of the Item. - - Returns: - item (Dict): A dictionary containing the source data for the Item. - - Raises: - NotFoundError: If the specified Item does not exist in the Collection. - - Notes: - The Item is retrieved from the Elasticsearch database using the `client.get` method, - with the index for the Collection as the target index and the combined `mk_item_id` as the document id. - """ - try: - item = await self.client.get( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" - ) - return item["_source"] - - @staticmethod - def make_search(): - """Database logic to create a Search instance.""" - return Search().sort(*DEFAULT_SORT) - - @staticmethod - def apply_ids_filter(search: Search, item_ids: List[str]): - """Database logic to search a list of STAC item ids.""" - return search.filter("terms", id=item_ids) - - @staticmethod - def apply_collections_filter(search: Search, collection_ids: List[str]): - """Database logic to search a list of STAC collection ids.""" - return search.filter("terms", collection=collection_ids) - - @staticmethod - def apply_datetime_filter(search: Search, datetime_search): - """Apply a filter to search based on datetime field. - - Args: - search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. - - Returns: - Search: The filtered search object. - """ - if "eq" in datetime_search: - search = search.filter( - "term", **{"properties__datetime": datetime_search["eq"]} - ) - else: - search = search.filter( - "range", properties__datetime={"lte": datetime_search["lte"]} - ) - search = search.filter( - "range", properties__datetime={"gte": datetime_search["gte"]} - ) - return search - - @staticmethod - def apply_bbox_filter(search: Search, bbox: List): - """Filter search results based on bounding box. - - Args: - search (Search): The search object to apply the filter to. - bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. - - Returns: - search (Search): The search object with the bounding box filter applied. - - Notes: - The bounding box is transformed into a polygon using the `bbox2polygon` function and - a geo_shape filter is added to the search object, set to intersect with the specified polygon. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": "polygon", - "coordinates": bbox2polygon(*bbox), - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_intersects_filter( - search: Search, - intersects: Geometry, - ): - """Filter search results based on intersecting geometry. - - Args: - search (Search): The search object to apply the filter to. - intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. - - Returns: - search (Search): The search object with the intersecting geometry filter applied. - - Notes: - A geo_shape filter is added to the search object, set to intersect with the specified geometry. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": intersects.type.lower(), - "coordinates": intersects.coordinates, - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_stacql_filter(search: Search, op: str, field: str, value: float): - """Filter search results based on a comparison between a field and a value. - - Args: - search (Search): The search object to apply the filter to. - op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), - 'lt' (less than), or 'lte' (less than or equal). - field (str): The field to perform the comparison on. - value (float): The value to compare the field against. - - Returns: - search (Search): The search object with the specified filter applied. - """ - if op != "eq": - key_filter = {field: {f"{op}": value}} - search = search.filter(Q("range", **key_filter)) - else: - search = search.filter("term", **{field: value}) - - return search - - @staticmethod - def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): - """Database logic to perform query for search endpoint.""" - if _filter is not None: - search = search.filter(filter.Clause.parse_obj(_filter).to_es()) - return search - - @staticmethod - def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: - """Database logic to sort search instance.""" - if sortby: - return {s.field: {"order": s.direction} for s in sortby} - else: - return None - - async def execute_search( - self, - search: Search, - limit: int, - token: Optional[str], - sort: Optional[Dict[str, Dict[str, str]]], - collection_ids: Optional[List[str]], - ignore_unavailable: bool = True, - ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: - """Execute a search query with limit and other optional parameters. - - Args: - search (Search): The search query to be executed. - limit (int): The maximum number of results to be returned. - token (Optional[str]): The token used to return the next set of results. - sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. - collection_ids (Optional[List[str]]): The collection ids to search. - ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. - - Returns: - Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: - - An iterable of search results, where each result is a dictionary with keys and values representing the - fields and values of each document. - - The total number of results (if the count could be computed), or None if the count could not be - computed. - - The token to be used to retrieve the next set of results, or None if there are no more results. - - Raises: - NotFoundError: If the collections specified in `collection_ids` do not exist. - """ - search_after = None - if token: - search_after = urlsafe_b64decode(token.encode()).decode().split(",") - - query = search.query.to_dict() if search.query else None - - index_param = indices(collection_ids) - - search_task = asyncio.create_task( - self.client.search( - index=index_param, - ignore_unavailable=ignore_unavailable, - query=query, - sort=sort or DEFAULT_SORT, - search_after=search_after, - size=limit, - ) - ) - - count_task = asyncio.create_task( - self.client.count( - index=index_param, - ignore_unavailable=ignore_unavailable, - body=search.to_dict(count=True), - ) - ) - - try: - es_response = await search_task - except exceptions.NotFoundError: - raise NotFoundError(f"Collections '{collection_ids}' do not exist") - - hits = es_response["hits"]["hits"] - items = (hit["_source"] for hit in hits) - - next_token = None - if hits and (sort_array := hits[-1].get("sort")): - next_token = urlsafe_b64encode( - ",".join([str(x) for x in sort_array]).encode() - ).decode() - - # (1) count should not block returning results, so don't wait for it to be done - # (2) don't cancel the task so that it will populate the ES cache for subsequent counts - maybe_count = None - if count_task.done(): - try: - maybe_count = count_task.result().get("count") - except Exception as e: - logger.error(f"Count task failed: {e}") - - return items, maybe_count, next_token - - """ TRANSACTION LOGIC """ - - async def check_collection_exists(self, collection_id: str): - """Database logic to check if a collection exists.""" - if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - async def prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Preps an item for insertion into the database. - - Args: - item (Item): The item to be prepped for insertion. - base_url (str): The base URL used to create the item's self URL. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The prepped item. - - Raises: - ConflictError: If the item already exists in the database. - - """ - await self.check_collection_exists(collection_id=item["collection"]) - - if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), - id=mk_item_id(item["id"], item["collection"]), - ): - raise ConflictError( - f"Item {item['id']} in collection {item['collection']} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - def sync_prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Prepare an item for insertion into the database. - - This method performs pre-insertion preparation on the given `item`, - such as checking if the collection the item belongs to exists, - and optionally verifying that an item with the same ID does not already exist in the database. - - Args: - item (Item): The item to be inserted into the database. - base_url (str): The base URL used for constructing URLs for the item. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The item after preparation is done. - - Raises: - NotFoundError: If the collection that the item belongs to does not exist in the database. - ConflictError: If an item with the same ID already exists in the collection. - """ - item_id = item["id"] - collection_id = item["collection"] - if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ): - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - async def create_item(self, item: Item, refresh: bool = False): - """Database logic for creating one item. - - Args: - item (Item): The item to be created. - refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. - - Raises: - ConflictError: If the item already exists in the database. - - Returns: - None - """ - # todo: check if collection exists, but cache - item_id = item["id"] - collection_id = item["collection"] - es_resp = await self.client.index( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - document=item, - refresh=refresh, - ) - - if (meta := es_resp.get("meta")) and meta.get("status") == 409: - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - async def delete_item( - self, item_id: str, collection_id: str, refresh: bool = False - ): - """Delete a single item from the database. - - Args: - item_id (str): The id of the Item to be deleted. - collection_id (str): The id of the Collection that the Item belongs to. - refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. - - Raises: - NotFoundError: If the Item does not exist in the database. - """ - try: - await self.client.delete( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - refresh=refresh, - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} in collection {collection_id} not found" - ) - - async def create_collection(self, collection: Collection, refresh: bool = False): - """Create a single collection in the database. - - Args: - collection (Collection): The Collection object to be created. - refresh (bool, optional): Whether to refresh the index after the creation. Default is False. - - Raises: - ConflictError: If a Collection with the same id already exists in the database. - - Notes: - A new index is created for the items in the Collection using the `create_item_index` function. - """ - collection_id = collection["id"] - - if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise ConflictError(f"Collection {collection_id} already exists") - - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - document=collection, - refresh=refresh, - ) - - await create_item_index(collection_id) - - async def find_collection(self, collection_id: str) -> Collection: - """Find and return a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be found. - - Returns: - Collection: The found collection, represented as a `Collection` object. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function searches for a collection in the database using the specified `collection_id` and returns the found - collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. - """ - try: - collection = await self.client.get( - index=COLLECTIONS_INDEX, id=collection_id - ) - except exceptions.NotFoundError: - raise NotFoundError(f"Collection {collection_id} not found") - - return collection["_source"] - - async def update_collection( - self, collection_id: str, collection: Collection, refresh: bool = False - ): - """Update a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be updated. - collection (Collection): The Collection object to be used for the update. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not - found in the database. - - Notes: - This function updates the collection in the database using the specified - `collection_id` and with the collection specified in the `Collection` object. - If the collection is not found, a `NotFoundError` is raised. - """ - await self.find_collection(collection_id=collection_id) - - if collection_id != collection["id"]: - await self.create_collection(collection, refresh=refresh) - - await self.client.reindex( - body={ - "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, - "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, - "script": { - "lang": "painless", - "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", - }, - }, - wait_for_completion=True, - refresh=refresh, - ) - - await self.delete_collection(collection_id) - - else: - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - document=collection, - refresh=refresh, - ) - - async def delete_collection(self, collection_id: str, refresh: bool = False): - """Delete a collection from the database. - - Parameters: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be deleted. - refresh (bool): Whether to refresh the index after the deletion (default: False). - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function first verifies that the collection with the specified `collection_id` exists in the database, and then - deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this - function also calls `delete_item_index` to delete the index for the items in the collection. - """ - await self.find_collection(collection_id=collection_id) - await self.client.delete( - index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh - ) - await delete_item_index(collection_id) - - async def bulk_async( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database asynchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The - `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the - index is refreshed after the bulk insert. The function does not return any value. - """ - await helpers.async_bulk( - self.client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - def bulk_sync( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database synchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed synchronously and blocking, meaning that the function does not return until the insert has - completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to - True, the index is refreshed after the bulk insert. The function does not return any value. - """ - helpers.bulk( - self.sync_client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - # DANGER - async def delete_items(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=ITEM_INDICES, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) - - # DANGER - async def delete_collections(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=COLLECTIONS_INDEX, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py deleted file mode 100644 index 6b648e2b..00000000 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py +++ /dev/null @@ -1,2 +0,0 @@ -"""library version.""" -__version__ = "2.0.0" diff --git a/stac_fastapi/opensearch/README.md b/stac_fastapi/opensearch/README.md deleted file mode 100644 index 6b1f8391..00000000 --- a/stac_fastapi/opensearch/README.md +++ /dev/null @@ -1 +0,0 @@ -# stac-fastapi-opensearch \ No newline at end of file diff --git a/stac_fastapi/opensearch/pytest.ini b/stac_fastapi/opensearch/pytest.ini deleted file mode 100644 index db0353ef..00000000 --- a/stac_fastapi/opensearch/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -testpaths = tests -addopts = -sv -asyncio_mode = auto \ No newline at end of file diff --git a/stac_fastapi/opensearch/setup.cfg b/stac_fastapi/opensearch/setup.cfg deleted file mode 100644 index 9f0be4b7..00000000 --- a/stac_fastapi/opensearch/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -version = attr: stac_fastapi.opensearch.version.__version__ diff --git a/stac_fastapi/opensearch/setup.py b/stac_fastapi/opensearch/setup.py deleted file mode 100644 index 224e733b..00000000 --- a/stac_fastapi/opensearch/setup.py +++ /dev/null @@ -1,55 +0,0 @@ -"""stac_fastapi: opensearch module.""" - -from setuptools import find_namespace_packages, setup - -with open("README.md") as f: - desc = f.read() - -install_requires = [ - "stac-fastapi.core==2.0.0", - "opensearch-py==2.4.2", - "opensearch-py[async]==2.4.2", - "uvicorn", - "starlette", -] - -extra_reqs = { - "dev": [ - "pytest", - "pytest-cov", - "pytest-asyncio", - "pre-commit", - "requests", - "ciso8601", - "httpx", - ], - "docs": ["mkdocs", "mkdocs-material", "pdocs"], - "server": ["uvicorn[standard]==0.19.0"], -} - -setup( - name="stac-fastapi.opensearch", - description="Opensearch stac-fastapi backend.", - long_description=desc, - long_description_content_type="text/markdown", - python_requires=">=3.8", - classifiers=[ - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "License :: OSI Approved :: MIT License", - ], - url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", - license="MIT", - packages=find_namespace_packages(), - zip_safe=False, - install_requires=install_requires, - extras_require=extra_reqs, - entry_points={ - "console_scripts": ["stac-fastapi-opensearch=stac_fastapi.opensearch.app:run"] - }, -) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py deleted file mode 100644 index 342b8919..00000000 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""opensearch submodule.""" diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py deleted file mode 100644 index ebb2921e..00000000 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ /dev/null @@ -1,109 +0,0 @@ -"""FastAPI application.""" - -from stac_fastapi.api.app import StacApi -from stac_fastapi.api.models import create_get_request_model, create_post_request_model -from stac_fastapi.core.core import ( - BulkTransactionsClient, - CoreClient, - EsAsyncBaseFiltersClient, - TransactionsClient, -) -from stac_fastapi.core.extensions import QueryExtension -from stac_fastapi.core.session import Session -from stac_fastapi.extensions.core import ( - ContextExtension, - FieldsExtension, - FilterExtension, - SortExtension, - TokenPaginationExtension, - TransactionExtension, -) -from stac_fastapi.extensions.third_party import BulkTransactionExtension -from stac_fastapi.opensearch.config import OpensearchSettings -from stac_fastapi.opensearch.database_logic import ( - DatabaseLogic, - create_collection_index, -) - -settings = OpensearchSettings() -session = Session.create_from_settings(settings) - -filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) -filter_extension.conformance_classes.append( - "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" -) - -database_logic = DatabaseLogic() - -extensions = [ - TransactionExtension( - client=TransactionsClient( - database=database_logic, session=session, settings=settings - ), - settings=settings, - ), - BulkTransactionExtension( - client=BulkTransactionsClient( - database=database_logic, - session=session, - settings=settings, - ) - ), - FieldsExtension(), - QueryExtension(), - SortExtension(), - TokenPaginationExtension(), - ContextExtension(), - filter_extension, -] - -post_request_model = create_post_request_model(extensions) - -api = StacApi( - settings=settings, - extensions=extensions, - client=CoreClient( - database=database_logic, session=session, post_request_model=post_request_model - ), - search_get_request_model=create_get_request_model(extensions), - search_post_request_model=post_request_model, -) -app = api.app - - -@app.on_event("startup") -async def _startup_event() -> None: - await create_collection_index() - - -def run() -> None: - """Run app from command line using uvicorn if available.""" - try: - import uvicorn - - uvicorn.run( - "stac_fastapi.opensearch.app:app", - host=settings.app_host, - port=settings.app_port, - log_level="info", - reload=settings.reload, - ) - except ImportError: - raise RuntimeError("Uvicorn must be installed in order to use command") - - -if __name__ == "__main__": - run() - - -def create_handler(app): - """Create a handler to use with AWS Lambda if mangum available.""" - try: - from mangum import Mangum - - return Mangum(app) - except ImportError: - return None - - -handler = create_handler(app) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py deleted file mode 100644 index cd34e318..00000000 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py +++ /dev/null @@ -1,81 +0,0 @@ -"""API configuration.""" -import os -import ssl -from typing import Any, Dict, Set - -from opensearchpy import AsyncOpenSearch, OpenSearch - -from stac_fastapi.types.config import ApiSettings - - -def _es_config() -> Dict[str, Any]: - # Determine the scheme (http or https) - use_ssl = os.getenv("ES_USE_SSL", "true").lower() == "true" - scheme = "https" if use_ssl else "http" - - # Configure the hosts parameter with the correct scheme - hosts = [f"{scheme}://{os.getenv('ES_HOST')}:{os.getenv('ES_PORT')}"] - - # Initialize the configuration dictionary - config = { - "hosts": hosts, - "headers": {"accept": "application/json", "Content-Type": "application/json"}, - } - - # Explicitly exclude SSL settings when not using SSL - if not use_ssl: - return config - - # Include SSL settings if using https - config["ssl_version"] = ssl.PROTOCOL_SSLv23 # type: ignore - config["verify_certs"] = os.getenv("ES_VERIFY_CERTS", "true").lower() != "false" # type: ignore - - # Include CA Certificates if verifying certs - if config["verify_certs"]: - config["ca_certs"] = os.getenv( - "CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt" - ) - - # Handle authentication - if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): - config["http_auth"] = (u, p) - - if api_key := os.getenv("ES_API_KEY"): - if isinstance(config["headers"], dict): - headers = {**config["headers"], "x-api-key": api_key} - - else: - config["headers"] = {"x-api-key": api_key} - - config["headers"] = headers - - return config - - -_forbidden_fields: Set[str] = {"type"} - - -class OpensearchSettings(ApiSettings): - """API settings.""" - - # Fields which are defined by STAC but not included in the database model - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self): - """Create es client.""" - return OpenSearch(**_es_config()) - - -class AsyncOpensearchSettings(ApiSettings): - """API settings.""" - - # Fields which are defined by STAC but not included in the database model - forbidden_fields: Set[str] = _forbidden_fields - indexed_fields: Set[str] = {"datetime"} - - @property - def create_client(self): - """Create async elasticsearch client.""" - return AsyncOpenSearch(**_es_config()) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py deleted file mode 100644 index 0f4bf9cf..00000000 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ /dev/null @@ -1,922 +0,0 @@ -"""Database logic.""" -import asyncio -import logging -import os -from base64 import urlsafe_b64decode, urlsafe_b64encode -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union - -import attr -from opensearchpy import exceptions, helpers -from opensearchpy.exceptions import TransportError -from opensearchpy.helpers.query import Q -from opensearchpy.helpers.search import Search - -from stac_fastapi.core import serializers -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.utilities import bbox2polygon -from stac_fastapi.opensearch.config import ( - AsyncOpensearchSettings as AsyncSearchSettings, -) -from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings -from stac_fastapi.types.errors import ConflictError, NotFoundError -from stac_fastapi.types.stac import Collection, Item - -logger = logging.getLogger(__name__) - -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") -ES_INDEX_NAME_UNSUPPORTED_CHARS = { - "\\", - "/", - "*", - "?", - '"', - "<", - ">", - "|", - " ", - ",", - "#", - ":", -} - -ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" - -DEFAULT_SORT = { - "properties.datetime": {"order": "desc"}, - "id": {"order": "desc"}, - "collection": {"order": "desc"}, -} - -ES_ITEMS_SETTINGS = { - "index": { - "sort.field": list(DEFAULT_SORT.keys()), - "sort.order": [v["order"] for v in DEFAULT_SORT.values()], - } -} - -ES_MAPPINGS_DYNAMIC_TEMPLATES = [ - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - { - "descriptions": { - "match_mapping_type": "string", - "match": "description", - "mapping": {"type": "text"}, - } - }, - { - "titles": { - "match_mapping_type": "string", - "match": "title", - "mapping": {"type": "text"}, - } - }, - # Projection Extension https://github.com/stac-extensions/projection - {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, - { - "proj_projjson": { - "match": "proj:projjson", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "proj_centroid": { - "match": "proj:centroid", - "mapping": {"type": "geo_point"}, - } - }, - { - "proj_geometry": { - "match": "proj:geometry", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "no_index_href": { - "match": "href", - "mapping": {"type": "text", "index": False}, - } - }, - # Default all other strings not otherwise specified to keyword - {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, - {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, -] - -ES_ITEMS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "collection": {"type": "keyword"}, - "geometry": {"type": "geo_shape"}, - "assets": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "properties": { - "type": "object", - "properties": { - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, - "start_datetime": {"type": "date"}, - "end_datetime": {"type": "date"}, - "created": {"type": "date"}, - "updated": {"type": "date"}, - # Satellite Extension https://github.com/stac-extensions/sat - "sat:absolute_orbit": {"type": "integer"}, - "sat:relative_orbit": {"type": "integer"}, - }, - }, - }, -} - -ES_COLLECTIONS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, - "extent.temporal.interval": {"type": "date"}, - "providers": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "item_assets": {"type": "object", "enabled": False}, - }, -} - - -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None: - return ITEM_INDICES - else: - return ",".join([index_by_collection_id(c) for c in collection_ids]) - - -async def create_collection_index() -> None: - """ - Create the index for a Collection. - - Returns: - None - - """ - client = AsyncSearchSettings().create_client - - search_body = { - "mappings": ES_COLLECTIONS_MAPPINGS, - "aliases": {COLLECTIONS_INDEX: {}}, - } - - index = f"{COLLECTIONS_INDEX}-000001" - - try: - await client.indices.create(index=index, body=search_body) - except TransportError as e: - if e.status_code == 400: - pass # Ignore 400 status codes - else: - raise e - - await client.close() - - -async def create_item_index(collection_id: str): - """ - Create the index for Items. - - Args: - collection_id (str): Collection identifier. - - Returns: - None - - """ - client = AsyncSearchSettings().create_client - index_name = index_by_collection_id(collection_id) - search_body = { - "aliases": {index_name: {}}, - "mappings": ES_ITEMS_MAPPINGS, - "settings": ES_ITEMS_SETTINGS, - } - - try: - await client.indices.create(index=f"{index_name}-000001", body=search_body) - except TransportError as e: - if e.status_code == 400: - pass # Ignore 400 status codes - else: - raise e - - await client.close() - - -async def delete_item_index(collection_id: str): - """Delete the index for items in a collection. - - Args: - collection_id (str): The ID of the collection whose items index will be deleted. - """ - client = AsyncSearchSettings().create_client - - name = index_by_collection_id(collection_id) - resolved = await client.indices.resolve_index(name=name) - if "aliases" in resolved and resolved["aliases"]: - [alias] = resolved["aliases"] - await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) - await client.indices.delete(index=alias["indices"]) - else: - await client.indices.delete(index=name) - await client.close() - - -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]): - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - return [ - { - "_index": index_by_collection_id(collection_id), - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, - } - for item in processed_items - ] - - -# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, -# So create our own Protocol for typing -# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - -@attr.s -class DatabaseLogic: - """Database logic.""" - - client = AsyncSearchSettings().create_client - sync_client = SyncSearchSettings().create_client - - item_serializer: Type[serializers.ItemSerializer] = attr.ib( - default=serializers.ItemSerializer - ) - collection_serializer: Type[serializers.CollectionSerializer] = attr.ib( - default=serializers.CollectionSerializer - ) - - """CORE LOGIC""" - - async def get_all_collections( - self, token: Optional[str], limit: int, base_url: str - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - """ - Retrieve a list of all collections from Opensearch, supporting pagination. - - Args: - token (Optional[str]): The pagination token. - limit (int): The number of results to return. - - Returns: - A tuple of (collections, next pagination token if any). - """ - search_body = { - "sort": [{"id": {"order": "asc"}}], - "size": limit, - } - - # Only add search_after to the query if token is not None and not empty - if token: - search_after = [token] - search_body["search_after"] = search_after - - response = await self.client.search( - index="collections", - body=search_body, - ) - - hits = response["hits"]["hits"] - collections = [ - self.collection_serializer.db_to_stac( - collection=hit["_source"], base_url=base_url - ) - for hit in hits - ] - - next_token = None - if len(hits) == limit: - # Ensure we have a valid sort value for next_token - next_token_values = hits[-1].get("sort") - if next_token_values: - next_token = next_token_values[0] - - return collections, next_token - - async def get_one_item(self, collection_id: str, item_id: str) -> Dict: - """Retrieve a single item from the database. - - Args: - collection_id (str): The id of the Collection that the Item belongs to. - item_id (str): The id of the Item. - - Returns: - item (Dict): A dictionary containing the source data for the Item. - - Raises: - NotFoundError: If the specified Item does not exist in the Collection. - - Notes: - The Item is retrieved from the Elasticsearch database using the `client.get` method, - with the index for the Collection as the target index and the combined `mk_item_id` as the document id. - """ - try: - item = await self.client.get( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" - ) - return item["_source"] - - @staticmethod - def make_search(): - """Database logic to create a Search instance.""" - return Search().sort(*DEFAULT_SORT) - - @staticmethod - def apply_ids_filter(search: Search, item_ids: List[str]): - """Database logic to search a list of STAC item ids.""" - return search.filter("terms", id=item_ids) - - @staticmethod - def apply_collections_filter(search: Search, collection_ids: List[str]): - """Database logic to search a list of STAC collection ids.""" - return search.filter("terms", collection=collection_ids) - - @staticmethod - def apply_datetime_filter(search: Search, datetime_search): - """Apply a filter to search based on datetime field. - - Args: - search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. - - Returns: - Search: The filtered search object. - """ - if "eq" in datetime_search: - search = search.filter( - "term", **{"properties__datetime": datetime_search["eq"]} - ) - else: - search = search.filter( - "range", properties__datetime={"lte": datetime_search["lte"]} - ) - search = search.filter( - "range", properties__datetime={"gte": datetime_search["gte"]} - ) - return search - - @staticmethod - def apply_bbox_filter(search: Search, bbox: List): - """Filter search results based on bounding box. - - Args: - search (Search): The search object to apply the filter to. - bbox (List): The bounding box coordinates, represented as a list of four values [minx, miny, maxx, maxy]. - - Returns: - search (Search): The search object with the bounding box filter applied. - - Notes: - The bounding box is transformed into a polygon using the `bbox2polygon` function and - a geo_shape filter is added to the search object, set to intersect with the specified polygon. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": "polygon", - "coordinates": bbox2polygon(*bbox), - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_intersects_filter( - search: Search, - intersects: Geometry, - ): - """Filter search results based on intersecting geometry. - - Args: - search (Search): The search object to apply the filter to. - intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. - - Returns: - search (Search): The search object with the intersecting geometry filter applied. - - Notes: - A geo_shape filter is added to the search object, set to intersect with the specified geometry. - """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": intersects.type.lower(), - "coordinates": intersects.coordinates, - }, - "relation": "intersects", - } - } - } - ) - ) - - @staticmethod - def apply_stacql_filter(search: Search, op: str, field: str, value: float): - """Filter search results based on a comparison between a field and a value. - - Args: - search (Search): The search object to apply the filter to. - op (str): The comparison operator to use. Can be 'eq' (equal), 'gt' (greater than), 'gte' (greater than or equal), - 'lt' (less than), or 'lte' (less than or equal). - field (str): The field to perform the comparison on. - value (float): The value to compare the field against. - - Returns: - search (Search): The search object with the specified filter applied. - """ - if op != "eq": - key_filter = {field: {f"{op}": value}} - search = search.filter(Q("range", **key_filter)) - else: - search = search.filter("term", **{field: value}) - - return search - - @staticmethod - def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): - """Database logic to perform query for search endpoint.""" - if _filter is not None: - search = search.filter(filter.Clause.parse_obj(_filter).to_es()) - return search - - @staticmethod - def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: - """Database logic to sort search instance.""" - if sortby: - return {s.field: {"order": s.direction} for s in sortby} - else: - return None - - async def execute_search( - self, - search: Search, - limit: int, - token: Optional[str], - sort: Optional[Dict[str, Dict[str, str]]], - collection_ids: Optional[List[str]], - ignore_unavailable: bool = True, - ) -> Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: - """Execute a search query with limit and other optional parameters. - - Args: - search (Search): The search query to be executed. - limit (int): The maximum number of results to be returned. - token (Optional[str]): The token used to return the next set of results. - sort (Optional[Dict[str, Dict[str, str]]]): Specifies how the results should be sorted. - collection_ids (Optional[List[str]]): The collection ids to search. - ignore_unavailable (bool, optional): Whether to ignore unavailable collections. Defaults to True. - - Returns: - Tuple[Iterable[Dict[str, Any]], Optional[int], Optional[str]]: A tuple containing: - - An iterable of search results, where each result is a dictionary with keys and values representing the - fields and values of each document. - - The total number of results (if the count could be computed), or None if the count could not be - computed. - - The token to be used to retrieve the next set of results, or None if there are no more results. - - Raises: - NotFoundError: If the collections specified in `collection_ids` do not exist. - """ - search_body: Dict[str, Any] = {} - query = search.query.to_dict() if search.query else None - if query: - search_body["query"] = query - if token: - search_after = urlsafe_b64decode(token.encode()).decode().split(",") - search_body["search_after"] = search_after - search_body["sort"] = sort if sort else DEFAULT_SORT - - index_param = indices(collection_ids) - - search_task = asyncio.create_task( - self.client.search( - index=index_param, - ignore_unavailable=ignore_unavailable, - body=search_body, - size=limit, - ) - ) - - count_task = asyncio.create_task( - self.client.count( - index=index_param, - ignore_unavailable=ignore_unavailable, - body=search.to_dict(count=True), - ) - ) - - try: - es_response = await search_task - except exceptions.NotFoundError: - raise NotFoundError(f"Collections '{collection_ids}' do not exist") - - hits = es_response["hits"]["hits"] - items = (hit["_source"] for hit in hits) - - next_token = None - if hits and (sort_array := hits[-1].get("sort")): - next_token = urlsafe_b64encode( - ",".join([str(x) for x in sort_array]).encode() - ).decode() - - # (1) count should not block returning results, so don't wait for it to be done - # (2) don't cancel the task so that it will populate the ES cache for subsequent counts - maybe_count = None - if count_task.done(): - try: - maybe_count = count_task.result().get("count") - except Exception as e: - logger.error(f"Count task failed: {e}") - - return items, maybe_count, next_token - - """ TRANSACTION LOGIC """ - - async def check_collection_exists(self, collection_id: str): - """Database logic to check if a collection exists.""" - if not await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - async def prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Preps an item for insertion into the database. - - Args: - item (Item): The item to be prepped for insertion. - base_url (str): The base URL used to create the item's self URL. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The prepped item. - - Raises: - ConflictError: If the item already exists in the database. - - """ - await self.check_collection_exists(collection_id=item["collection"]) - - if not exist_ok and await self.client.exists( - index=index_by_collection_id(item["collection"]), - id=mk_item_id(item["id"], item["collection"]), - ): - raise ConflictError( - f"Item {item['id']} in collection {item['collection']} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - def sync_prep_create_item( - self, item: Item, base_url: str, exist_ok: bool = False - ) -> Item: - """ - Prepare an item for insertion into the database. - - This method performs pre-insertion preparation on the given `item`, - such as checking if the collection the item belongs to exists, - and optionally verifying that an item with the same ID does not already exist in the database. - - Args: - item (Item): The item to be inserted into the database. - base_url (str): The base URL used for constructing URLs for the item. - exist_ok (bool): Indicates whether the item can exist already. - - Returns: - Item: The item after preparation is done. - - Raises: - NotFoundError: If the collection that the item belongs to does not exist in the database. - ConflictError: If an item with the same ID already exists in the collection. - """ - item_id = item["id"] - collection_id = item["collection"] - if not self.sync_client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise NotFoundError(f"Collection {collection_id} does not exist") - - if not exist_ok and self.sync_client.exists( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - ): - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - return self.item_serializer.stac_to_db(item, base_url) - - async def create_item(self, item: Item, refresh: bool = False): - """Database logic for creating one item. - - Args: - item (Item): The item to be created. - refresh (bool, optional): Refresh the index after performing the operation. Defaults to False. - - Raises: - ConflictError: If the item already exists in the database. - - Returns: - None - """ - # todo: check if collection exists, but cache - item_id = item["id"] - collection_id = item["collection"] - es_resp = await self.client.index( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - body=item, - refresh=refresh, - ) - - if (meta := es_resp.get("meta")) and meta.get("status") == 409: - raise ConflictError( - f"Item {item_id} in collection {collection_id} already exists" - ) - - async def delete_item( - self, item_id: str, collection_id: str, refresh: bool = False - ): - """Delete a single item from the database. - - Args: - item_id (str): The id of the Item to be deleted. - collection_id (str): The id of the Collection that the Item belongs to. - refresh (bool, optional): Whether to refresh the index after the deletion. Default is False. - - Raises: - NotFoundError: If the Item does not exist in the database. - """ - try: - await self.client.delete( - index=index_by_collection_id(collection_id), - id=mk_item_id(item_id, collection_id), - refresh=refresh, - ) - except exceptions.NotFoundError: - raise NotFoundError( - f"Item {item_id} in collection {collection_id} not found" - ) - - async def create_collection(self, collection: Collection, refresh: bool = False): - """Create a single collection in the database. - - Args: - collection (Collection): The Collection object to be created. - refresh (bool, optional): Whether to refresh the index after the creation. Default is False. - - Raises: - ConflictError: If a Collection with the same id already exists in the database. - - Notes: - A new index is created for the items in the Collection using the `create_item_index` function. - """ - collection_id = collection["id"] - - if await self.client.exists(index=COLLECTIONS_INDEX, id=collection_id): - raise ConflictError(f"Collection {collection_id} already exists") - - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - body=collection, - refresh=refresh, - ) - - await create_item_index(collection_id) - - async def find_collection(self, collection_id: str) -> Collection: - """Find and return a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be found. - - Returns: - Collection: The found collection, represented as a `Collection` object. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function searches for a collection in the database using the specified `collection_id` and returns the found - collection as a `Collection` object. If the collection is not found, a `NotFoundError` is raised. - """ - try: - collection = await self.client.get( - index=COLLECTIONS_INDEX, id=collection_id - ) - except exceptions.NotFoundError: - raise NotFoundError(f"Collection {collection_id} not found") - - return collection["_source"] - - async def update_collection( - self, collection_id: str, collection: Collection, refresh: bool = False - ): - """Update a collection from the database. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be updated. - collection (Collection): The Collection object to be used for the update. - - Raises: - NotFoundError: If the collection with the given `collection_id` is not - found in the database. - - Notes: - This function updates the collection in the database using the specified - `collection_id` and with the collection specified in the `Collection` object. - If the collection is not found, a `NotFoundError` is raised. - """ - await self.find_collection(collection_id=collection_id) - - if collection_id != collection["id"]: - await self.create_collection(collection, refresh=refresh) - - await self.client.reindex( - body={ - "dest": {"index": f"{ITEMS_INDEX_PREFIX}{collection['id']}"}, - "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, - "script": { - "lang": "painless", - "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", - }, - }, - wait_for_completion=True, - refresh=refresh, - ) - - await self.delete_collection(collection_id) - - else: - await self.client.index( - index=COLLECTIONS_INDEX, - id=collection_id, - body=collection, - refresh=refresh, - ) - - async def delete_collection(self, collection_id: str, refresh: bool = False): - """Delete a collection from the database. - - Parameters: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to be deleted. - refresh (bool): Whether to refresh the index after the deletion (default: False). - - Raises: - NotFoundError: If the collection with the given `collection_id` is not found in the database. - - Notes: - This function first verifies that the collection with the specified `collection_id` exists in the database, and then - deletes the collection. If `refresh` is set to True, the index is refreshed after the deletion. Additionally, this - function also calls `delete_item_index` to delete the index for the items in the collection. - """ - await self.find_collection(collection_id=collection_id) - await self.client.delete( - index=COLLECTIONS_INDEX, id=collection_id, refresh=refresh - ) - await delete_item_index(collection_id) - - async def bulk_async( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database asynchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed asynchronously, and the event loop is used to run the operation in a separate executor. The - `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the - index is refreshed after the bulk insert. The function does not return any value. - """ - await helpers.async_bulk( - self.client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - def bulk_sync( - self, collection_id: str, processed_items: List[Item], refresh: bool = False - ) -> None: - """Perform a bulk insert of items into the database synchronously. - - Args: - self: The instance of the object calling this function. - collection_id (str): The ID of the collection to which the items belong. - processed_items (List[Item]): A list of `Item` objects to be inserted into the database. - refresh (bool): Whether to refresh the index after the bulk insert (default: False). - - Notes: - This function performs a bulk insert of `processed_items` into the database using the specified `collection_id`. The - insert is performed synchronously and blocking, meaning that the function does not return until the insert has - completed. The `mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to - True, the index is refreshed after the bulk insert. The function does not return any value. - """ - helpers.bulk( - self.sync_client, - mk_actions(collection_id, processed_items), - refresh=refresh, - raise_on_error=False, - ) - - # DANGER - async def delete_items(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=ITEM_INDICES, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) - - # DANGER - async def delete_collections(self) -> None: - """Danger. this is only for tests.""" - await self.client.delete_by_query( - index=COLLECTIONS_INDEX, - body={"query": {"match_all": {}}}, - wait_for_completion=True, - ) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py deleted file mode 100644 index 6b648e2b..00000000 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py +++ /dev/null @@ -1,2 +0,0 @@ -"""library version.""" -__version__ = "2.0.0" From b06dc0712675987d8d23ca480a977f359ddfe454 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 2 Mar 2024 23:51:06 +0800 Subject: [PATCH 17/25] clean up docker-compose --- docker-compose.yml | 84 ---------------------------------------------- 1 file changed, 84 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 696ac303..d531a85b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,64 +1,6 @@ version: '3.9' services: - app-elasticsearch: - container_name: stac-fastapi-es - image: stac-utils/stac-fastapi-es - restart: always - build: - context: . - dockerfile: dockerfiles/Dockerfile.dev.es - environment: - - APP_HOST=0.0.0.0 - - APP_PORT=8080 - - RELOAD=true - - ENVIRONMENT=local - - WEB_CONCURRENCY=10 - - ES_HOST=elasticsearch - - ES_PORT=9200 - - ES_USE_SSL=false - - ES_VERIFY_CERTS=false - - BACKEND=elasticsearch - ports: - - "8080:8080" - volumes: - - ./stac_fastapi:/app/stac_fastapi - - ./scripts:/app/scripts - - ./esdata:/usr/share/elasticsearch/data - depends_on: - - elasticsearch - command: - bash -c "./scripts/wait-for-it-es.sh es-container:9200 && python -m stac_fastapi.elasticsearch.app" - - app-opensearch: - container_name: stac-fastapi-os - image: stac-utils/stac-fastapi-os - restart: always - build: - context: . - dockerfile: dockerfiles/Dockerfile.dev.os - environment: - - APP_HOST=0.0.0.0 - - APP_PORT=8082 - - RELOAD=true - - ENVIRONMENT=local - - WEB_CONCURRENCY=10 - - ES_HOST=opensearch - - ES_PORT=9202 - - ES_USE_SSL=false - - ES_VERIFY_CERTS=false - - BACKEND=opensearch - ports: - - "8082:8082" - volumes: - - ./stac_fastapi:/app/stac_fastapi - - ./scripts:/app/scripts - - ./osdata:/usr/share/opensearch/data - depends_on: - - opensearch - command: - bash -c "./scripts/wait-for-it-es.sh os-container:9202 && python -m stac_fastapi.opensearch.app" - app-mongo: container_name: stac-fastapi-mongo image: stac-utils/stac-fastapi-mongo @@ -106,29 +48,3 @@ services: - ME_CONFIG_MONGODB_ADMINUSERNAME=root - ME_CONFIG_MONGODB_ADMINPASSWORD=example - ME_CONFIG_MONGODB_URL=mongodb://root:example@mongo:27017/ - - elasticsearch: - container_name: es-container - image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.11.0} - hostname: elasticsearch - environment: - ES_JAVA_OPTS: -Xms512m -Xmx1g - volumes: - - ./elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml - - ./elasticsearch/snapshots:/usr/share/elasticsearch/snapshots - ports: - - "9200:9200" - - opensearch: - container_name: os-container - image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-2.11.1} - hostname: opensearch - environment: - - discovery.type=single-node - - plugins.security.disabled=true - - OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m - volumes: - - ./opensearch/config/opensearch.yml:/usr/share/opensearch/config/opensearch.yml - - ./opensearch/snapshots:/usr/share/opensearch/snapshots - ports: - - "9202:9202" From 77661dbac49dedf2029fa6489273917842165615 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 2 Mar 2024 23:55:08 +0800 Subject: [PATCH 18/25] comment out workflow for now --- .github/workflows/cicd.yml | 215 ++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 108 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 915929cd..2a7968bc 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,5 +1,4 @@ -name: stac-fastapi-elasticsearch -on: +name: stac-fastapi-mongo push: branches: - main @@ -8,123 +7,123 @@ on: - main - features/** -jobs: - test: - runs-on: ubuntu-latest - timeout-minutes: 10 +# jobs: +# test: +# runs-on: ubuntu-latest +# timeout-minutes: 10 - services: +# services: - elasticsearch_8_svc: - image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 - env: - cluster.name: stac-cluster - node.name: es01 - network.host: 0.0.0.0 - transport.host: 0.0.0.0 - discovery.type: single-node - http.port: 9200 - xpack.license.self_generated.type: basic - xpack.security.enabled: false - xpack.security.transport.ssl.enabled: false - ES_JAVA_OPTS: -Xms512m -Xmx1g - ports: - - 9200:9200 +# elasticsearch_8_svc: +# image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0 +# env: +# cluster.name: stac-cluster +# node.name: es01 +# network.host: 0.0.0.0 +# transport.host: 0.0.0.0 +# discovery.type: single-node +# http.port: 9200 +# xpack.license.self_generated.type: basic +# xpack.security.enabled: false +# xpack.security.transport.ssl.enabled: false +# ES_JAVA_OPTS: -Xms512m -Xmx1g +# ports: +# - 9200:9200 - elasticsearch_7_svc: - image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1 - env: - cluster.name: stac-cluster - node.name: es01 - network.host: 0.0.0.0 - transport.host: 0.0.0.0 - discovery.type: single-node - http.port: 9400 - xpack.license.self_generated.type: basic - xpack.security.enabled: false - xpack.security.transport.ssl.enabled: false - ES_JAVA_OPTS: -Xms512m -Xmx1g - ports: - - 9400:9400 +# elasticsearch_7_svc: +# image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1 +# env: +# cluster.name: stac-cluster +# node.name: es01 +# network.host: 0.0.0.0 +# transport.host: 0.0.0.0 +# discovery.type: single-node +# http.port: 9400 +# xpack.license.self_generated.type: basic +# xpack.security.enabled: false +# xpack.security.transport.ssl.enabled: false +# ES_JAVA_OPTS: -Xms512m -Xmx1g +# ports: +# - 9400:9400 - opensearch_2_11: - image: opensearchproject/opensearch:2.11.1 - env: - cluster.name: stac-cluster - node.name: os01 - network.host: 0.0.0.0 - transport.host: 0.0.0.0 - discovery.type: single-node - http.port: 9202 - http.cors.enabled: true - plugins.security.disabled: true - plugins.security.ssl.http.enabled: true - OPENSEARCH_JAVA_OPTS: -Xms512m -Xmx512m - ports: - - 9202:9202 - strategy: - matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11"] +# opensearch_2_11: +# image: opensearchproject/opensearch:2.11.1 +# env: +# cluster.name: stac-cluster +# node.name: os01 +# network.host: 0.0.0.0 +# transport.host: 0.0.0.0 +# discovery.type: single-node +# http.port: 9202 +# http.cors.enabled: true +# plugins.security.disabled: true +# plugins.security.ssl.http.enabled: true +# OPENSEARCH_JAVA_OPTS: -Xms512m -Xmx512m +# ports: +# - 9202:9202 +# strategy: +# matrix: +# python-version: [ "3.8", "3.9", "3.10", "3.11"] - name: Python ${{ matrix.python-version }} testing +# name: Python ${{ matrix.python-version }} testing - steps: - - name: Check out repository code - uses: actions/checkout@v4 +# steps: +# - name: Check out repository code +# uses: actions/checkout@v4 - # Setup Python (faster than using Python container) - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Lint code - uses: pre-commit/action@v3.0.1 +# # Setup Python (faster than using Python container) +# - name: Setup Python +# uses: actions/setup-python@v5 +# with: +# python-version: ${{ matrix.python-version }} +# - name: Lint code +# uses: pre-commit/action@v3.0.1 - - name: Install pipenv - run: | - python -m pip install --upgrade pipenv wheel +# - name: Install pipenv +# run: | +# python -m pip install --upgrade pipenv wheel - - name: Install elasticsearch stac-fastapi - run: | - pip install ./stac_fastapi/elasticsearch[dev,server] +# - name: Install elasticsearch stac-fastapi +# run: | +# pip install ./stac_fastapi/elasticsearch[dev,server] - - name: Install opensearch stac-fastapi - run: | - pip install ./stac_fastapi/opensearch[dev,server] +# - name: Install opensearch stac-fastapi +# run: | +# pip install ./stac_fastapi/opensearch[dev,server] - - name: Install core library stac-fastapi - run: | - pip install ./stac_fastapi/core +# - name: Install core library stac-fastapi +# run: | +# pip install ./stac_fastapi/core - - name: Run test suite against Elasticsearch 7.x - run: | - pipenv run pytest -svvv - env: - ENVIRONMENT: testing - ES_PORT: 9200 - ES_HOST: 172.17.0.1 - ES_USE_SSL: false - ES_VERIFY_CERTS: false - BACKEND: elasticsearch +# - name: Run test suite against Elasticsearch 7.x +# run: | +# pipenv run pytest -svvv +# env: +# ENVIRONMENT: testing +# ES_PORT: 9200 +# ES_HOST: 172.17.0.1 +# ES_USE_SSL: false +# ES_VERIFY_CERTS: false +# BACKEND: elasticsearch - - name: Run test suite against Elasticsearch 8.x - run: | - pipenv run pytest -svvv - env: - ENVIRONMENT: testing - ES_PORT: 9400 - ES_HOST: 172.17.0.1 - ES_USE_SSL: false - ES_VERIFY_CERTS: false - BACKEND: elasticsearch +# - name: Run test suite against Elasticsearch 8.x +# run: | +# pipenv run pytest -svvv +# env: +# ENVIRONMENT: testing +# ES_PORT: 9400 +# ES_HOST: 172.17.0.1 +# ES_USE_SSL: false +# ES_VERIFY_CERTS: false +# BACKEND: elasticsearch - - name: Run test suite against OpenSearch 2.11.1 - run: | - pipenv run pytest -svvv - env: - ENVIRONMENT: testing - ES_PORT: 9202 - ES_HOST: 172.17.0.1 - ES_USE_SSL: false - ES_VERIFY_CERTS: false - BACKEND: opensearch \ No newline at end of file +# - name: Run test suite against OpenSearch 2.11.1 +# run: | +# pipenv run pytest -svvv +# env: +# ENVIRONMENT: testing +# ES_PORT: 9202 +# ES_HOST: 172.17.0.1 +# ES_USE_SSL: false +# ES_VERIFY_CERTS: false +# BACKEND: opensearch \ No newline at end of file From 95e3b84387785ecf8f44098a697b5d3aa9a3aea7 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 00:07:58 +0800 Subject: [PATCH 19/25] update readme 1 --- README.md | 159 +++++------------------------------------------------- 1 file changed, 12 insertions(+), 147 deletions(-) diff --git a/README.md b/README.md index 08782637..f115d995 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,14 @@ -# stac-fastapi-elasticsearch-opensearch (sfeos) +# stac-fastapi-mongo -## Elasticsearch, Opensearch and Mongo backends for the stac-fastapi project +## Mongo backend for the stac-fastapi project built on top of the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library. + +### Note: This is presently in development and for now is mostly a proof of concept showing that other databases can be plugged into the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library, besides for elasticsearch and opensearch. -[![PyPI version](https://badge.fury.io/py/stac-fastapi.elasticsearch.svg)](https://badge.fury.io/py/stac-fastapi.elasticsearch) To install from PyPI: ```shell -pip install stac_fastapi.elasticsearch -``` -or -``` -pip install stac_fastapi.opensearch +pip install stac_fastapi.mongo ``` #### For changes, see the [Changelog](CHANGELOG.md) @@ -22,13 +19,7 @@ pip install stac_fastapi.opensearch To install the classes in your local Python env, run: ```shell -pip install -e 'stac_fastapi/elasticsearch[dev]' -``` - -or - -```shell -pip install -e 'stac_fastapi/opensearch[dev]' +pip install -e 'stac_fastapi/mongo[dev]' ``` @@ -45,30 +36,20 @@ pre-commit run --all-files ## Build Elasticsearh API backend ```shell -docker-compose up elasticsearch -docker-compose build app-elasticsearch +docker-compose up mongo +docker-compose build app-mongo ``` -## Running Elasticsearh API on localhost:8080 - -```shell -docker-compose up app-elasticsearch -``` - -By default, docker-compose uses Elasticsearch 8.x and OpenSearch 2.11.1. -If you wish to use a different version, put the following in a -file named `.env` in the same directory you run docker-compose from: +## Running Mongo API on localhost:8084 ```shell -ELASTICSEARCH_VERSION=7.17.1 -OPENSEARCH_VERSION=2.11.0 +docker-compose up app-mongo ``` -The most recent Elasticsearch 7.x versions should also work. See the [opensearch-py docs](https://github.com/opensearch-project/opensearch-py/blob/main/COMPATIBILITY.md) for compatibility information. To create a new Collection: ```shell -curl -X "POST" "http://localhost:8080/collections" \ +curl -X "POST" "http://localhost:8084/collections" \ -H 'Content-Type: application/json; charset=utf-8' \ -d $'{ "id": "my_collection" @@ -85,7 +66,7 @@ returned from the `/collections` route contains a `next` link with the token tha get the next page of results. ```shell -curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" +curl -X "GET" "http://localhost:8084/collections?limit=1&token=example_token" ``` ## Testing @@ -93,126 +74,10 @@ curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" ```shell make test ``` -Test against OpenSearch only - -```shell -make test-opensearch -``` -Test against Elasticsearch only - -```shell -make test-elasticsearch -``` ## Ingest sample data ```shell make ingest ``` - -## Elasticsearch Mappings - -Mappings apply to search index, not source. - - -## Managing Elasticsearch Indices - -This section covers how to create a snapshot repository and then create and restore snapshots with this. - -Create a snapshot repository. This puts the files in the `elasticsearch/snapshots` in this git repo clone, as -the elasticsearch.yml and docker-compose files create a mapping from that directory to -`/usr/share/elasticsearch/snapshots` within the Elasticsearch container and grant permissions on using it. - -```shell -curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "type": "fs", - "settings": { - "location": "/usr/share/elasticsearch/snapshots/my_fs_backup" - } -}' -``` - -The next step is to create a snapshot of one or more indices into this snapshot repository. This command creates -a snapshot named `my_snapshot_2` and waits for the action to be completed before returning. This can also be done -asynchronously, and queried for status. The `indices` parameter determines which indices are snapshotted, and -can include wildcards. - -```shell -curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2?wait_for_completion=true" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "metadata": { - "taken_because": "dump of all items", - "taken_by": "pvarner" - }, - "include_global_state": false, - "ignore_unavailable": false, - "indices": "items_my-collection" -}' -``` - -To see the status of this snapshot: - -```shell -curl http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2 -``` - -To see all the snapshots: - -```shell -curl http://localhost:9200/_snapshot/my_fs_backup/_all -``` - -To restore a snapshot, run something similar to the following. This specific command will restore any indices that -match `items_*` and rename them so that the new index name will be suffixed with `-copy`. - -```shell -curl -X "POST" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2/_restore?wait_for_completion=true" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "include_aliases": false, - "include_global_state": false, - "ignore_unavailable": true, - "rename_replacement": "items_$1-copy", - "indices": "items_*", - "rename_pattern": "items_(.+)" -}' -``` - -Now the item documents have been restored in to the new index (e.g., `my-collection-copy`), but the value of the -`collection` field in those documents is still the original value of `my-collection`. To update these to match the -new collection name, run the following Elasticsearch Update By Query command, substituting the old collection name -into the term filter and the new collection name into the script parameter: - -```shell -curl -X "POST" "http://localhost:9200/items_my-collection-copy/_update_by_query" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "query": { - "match_all": {} -}, - "script": { - "lang": "painless", - "params": { - "collection": "my-collection-copy" - }, - "source": "ctx._source.collection = params.collection" - } -}' -``` - -Then, create a new collection through the api with the new name for each of the restored indices: - -```shell -curl -X "POST" "http://localhost:8080/collections" \ - -H 'Content-Type: application/json' \ - -d $'{ - "id": "my-collection-copy" -}' -``` - -Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be -correctly queried by collection name. From 504a42550da8ae81eb3a81a8a86ff6572a880f41 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 00:35:24 +0800 Subject: [PATCH 20/25] update data loader --- data_loader/data_loader.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/data_loader/data_loader.py b/data_loader/data_loader.py index c438811d..38ca5666 100644 --- a/data_loader/data_loader.py +++ b/data_loader/data_loader.py @@ -6,19 +6,13 @@ import click import requests -if len(sys.argv) != 2: - print("Usage: python data_loader.py ") +if len(sys.argv) != 1: + print("Usage: python data_loader.py") sys.exit(1) DATA_DIR = os.path.join(os.path.dirname(__file__), "setup_data/") -backend = sys.argv[1].lower() -if backend == "opensearch": - STAC_API_BASE_URL = "http://localhost:8082" -elif backend == "elasticsearch": - STAC_API_BASE_URL = "http://localhost:8080" -else: - print("Invalid backend tag. Enter either 'opensearch' or 'elasticsearch'.") +STAC_API_BASE_URL = "http://localhost:8084" def load_data(filename): From be48b1639d3345aaca03ffb5851a12b542f1e975 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 01:16:23 +0800 Subject: [PATCH 21/25] remove installing core locally --- dockerfiles/Dockerfile.deploy.mongo | 1 - dockerfiles/Dockerfile.dev.mongo | 1 - 2 files changed, 2 deletions(-) diff --git a/dockerfiles/Dockerfile.deploy.mongo b/dockerfiles/Dockerfile.deploy.mongo index 8215d0a7..d0d95df2 100644 --- a/dockerfiles/Dockerfile.deploy.mongo +++ b/dockerfiles/Dockerfile.deploy.mongo @@ -12,7 +12,6 @@ WORKDIR /app COPY . /app -RUN pip install --no-cache-dir -e ./stac_fastapi/core RUN pip install --no-cache-dir ./stac_fastapi/mongo[server] EXPOSE 8080 diff --git a/dockerfiles/Dockerfile.dev.mongo b/dockerfiles/Dockerfile.dev.mongo index df8e8494..4f43abac 100644 --- a/dockerfiles/Dockerfile.dev.mongo +++ b/dockerfiles/Dockerfile.dev.mongo @@ -15,5 +15,4 @@ WORKDIR /app COPY . /app -RUN pip install --no-cache-dir -e ./stac_fastapi/core RUN pip install --no-cache-dir -e ./stac_fastapi/mongo[dev,server] From 4d6b0aed7efe7a4a9aab38f781075ebfa305ce64 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 18:47:56 +0800 Subject: [PATCH 22/25] update changelog, license --- CHANGELOG.md | 115 ++------------------------------------------------- LICENSE | 2 +- 2 files changed, 4 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67882e2d..d6880575 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,133 +5,24 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [Unreleased] - -### Added - -- Added explicit mapping for ID in `ES_COLLECTIONS_MAPPINGS` [#198](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/198) - -### Changed - -- Removed database logic from core.py all_collections [#196](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/196) -- Changed OpenSearch config ssl_version to SSLv23 [#200](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/200) - -### Fixed - -## [v2.0.0] - -### Added - -- Added core library package for common logic [#186](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/186) - -### Changed - -- Moved Elasticsearch and Opensearch backends into separate packages [#186](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/186) - -### Fixed - -- Allow additional top-level properties on collections [#191](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/191) - -## [v1.1.0] - -### Added - -- Advanced comparison (LIKE, IN, BETWEEN) operators to the Filter extension [#178](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/178) -- Collection update endpoint no longer delete all sub items [#177](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/177) -- OpenSearch 2.11.1 support [#188](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/188) - -### Changed -- Elasticsearch drivers from 7.17.9 to 8.11.0 [#169](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/169) -- Collection update endpoint no longer delete all sub items [#177](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/177) - -### Fixed - -- Exclude unset fields in search response [#166](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/166) -- Upgrade stac-fastapi to v2.4.9 [#172](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/172) -- Set correct default filter-lang for GET /search requests [#179](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/179) - -## [v1.0.0] +## [Unreleased] ### Added -- Collection-level Assets to the CollectionSerializer [#148](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/148) -- Pagination for /collections - GET all collections - route [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164) -- Examples folder with example docker setup for running sfes from pip [#147](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/147) -- GET /search filter extension queries [#163](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/163) -- Added support for GET /search intersection queries [#158](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/158) - ### Changed -- Update elasticsearch version from 8.1.3 to 8.10.4 in cicd, gh actions [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164) -- Updated core stac-fastapi libraries to 2.4.8 from 2.4.3 [#151](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/151) -- Use aliases on Elasticsearch indices, add number suffix in index name. [#152](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/152) - ### Fixed -- Corrected the closing of client connections in ES index management functions [#132](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/132) -- Corrected the automatic converstion of float values to int when building Filter Clauses [#135](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/135) -- Do not index `proj:geometry` field as geo_shape [#154](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/154) -- Remove unsupported characters from Elasticsearch index names [#153](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/153) -- Fixed GET /search sortby requests [#25](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/25) - -## [v0.3.0] +## [v2.0.0] ### Added -- Added bbox and datetime parameters and functionality to item_collection [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) -- Added collection_id parameter to create_item function [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) -- Added item_id and collection_id to update_item [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) -- The default Collection objects index can be overridden by the `STAC_COLLECTIONS_INDEX` environment variable [#128](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/128) -- The default Item objects index prefix can be overridden by the `STAC_ITEMS_INDEX_PREFIX` environment variable [#128](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/128) -- Fields Extension [#129](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/129) -- Support for Python 3.11 [#131](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/131) - ### Changed -- Updated core stac-fastapi libraries to 2.4.3 from 2.3.0 [#127](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/127) - - -## [v0.2.0] - -### Added - -- Filter Extension as GET with CQL2-Text and POST with CQL2-JSON, - supporting the Basic CQL2 and Basic Spatial Operators conformance classes. -- Added Elasticsearch local config to support snapshot/restore to local filesystem - ### Fixed -- Fixed search intersects query. -- Corrected the Sort and Query conformance class URIs. - -### Changed - -- Default to Python 3.10 -- Default to Elasticsearch 8.x -- Collection objects are now stored in `collections` index rather than `stac_collections` index -- Item objects are no longer stored in `stac_items`, but in indices per collection named `items_{collection_id}` -- When using bulk ingest, items will continue to be ingested if any of them fail. Previously, the call would fail - immediately if any items failed. - - -## [v0.1.0] - -### Changed - -- Elasticsearch index mappings updated to be more thorough. -- Endpoints that return items (e.g., /search) now sort the results by 'properties.datetime,id,collection'. - Previously, there was no sort order defined. -- Db_to_stac serializer moved to core.py for consistency as it existed in both core and database_logic previously. -- Use genexp in execute_search and get_all_collections to return results. -- Added db_to_stac serializer to item_collection method in core.py. - [Unreleased]: -[v2.0.0]: -[v1.1.0]: -[v1.0.0]: -[v0.3.0]: -[v0.2.0]: -[v0.1.0]: \ No newline at end of file +[v2.0.0]: diff --git a/LICENSE b/LICENSE index 998f791a..5a60713e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Jonathan Healy +Copyright (c) 2024 Jonathan Healy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 152ecf0d522f22aa0a5181afa71389edf52d3ba4 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 18:48:16 +0800 Subject: [PATCH 23/25] fix client closing --- stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py index 71a59c39..a6242784 100644 --- a/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/stac_fastapi/mongo/database_logic.py @@ -79,7 +79,7 @@ async def create_item_index(): f"An error occurred while creating indexes for collection {ITEMS_INDEX}: {e}" ) finally: - await client.close() + client.close() def mk_item_id(item_id: str, collection_id: str): From d0565e161fab6ffed6e5fd911c9951e0eb4ba847 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 19:06:18 +0800 Subject: [PATCH 24/25] update makefile --- Makefile | 100 ++++++------------ .../elasticsearch/config/elasticsearch.yml | 18 ---- scripts/publish | 73 ------------- 3 files changed, 31 insertions(+), 160 deletions(-) delete mode 100644 examples/pip_docker/elasticsearch/config/elasticsearch.yml delete mode 100755 scripts/publish diff --git a/Makefile b/Makefile index 4dfb2aec..7467eb8e 100644 --- a/Makefile +++ b/Makefile @@ -2,92 +2,58 @@ APP_HOST ?= 0.0.0.0 EXTERNAL_APP_PORT ?= ${APP_PORT} -ES_APP_PORT ?= 8080 -ES_HOST ?= docker.for.mac.localhost -ES_PORT ?= 9200 +MONGO_APP_PORT ?= 8084 +MONGO_HOST ?= docker.for.mac.localhost +MONGO_PORT ?= 27017 -OS_APP_PORT ?= 8082 -OS_HOST ?= docker.for.mac.localhost -OS_PORT ?= 9202 - -run_es = docker-compose \ +run_mongo = docker-compose \ run \ - -p ${EXTERNAL_APP_PORT}:${ES_APP_PORT} \ + -p ${EXTERNAL_APP_PORT}:${MONGO_APP_PORT} \ -e PY_IGNORE_IMPORTMISMATCH=1 \ -e APP_HOST=${APP_HOST} \ - -e APP_PORT=${ES_APP_PORT} \ - app-elasticsearch + -e APP_PORT=${MONGO_APP_PORT} \ + app-mongo -run_os = docker-compose \ - run \ - -p ${EXTERNAL_APP_PORT}:${OS_APP_PORT} \ - -e PY_IGNORE_IMPORTMISMATCH=1 \ - -e APP_HOST=${APP_HOST} \ - -e APP_PORT=${OS_APP_PORT} \ - app-opensearch +.PHONY: image-deploy-mongo +image-deploy-mongo: + docker build -f dockerfiles/Dockerfile.dev.mongo -t stac-fastapi-mongo:latest . -.PHONY: image-deploy-es -image-deploy-es: - docker build -f dockerfiles/Dockerfile.dev.es -t stac-fastapi-elasticsearch:latest . - -.PHONY: image-deploy-os -image-deploy-os: - docker build -f dockerfiles/Dockerfile.dev.os -t stac-fastapi-opensearch:latest . .PHONY: run-deploy-locally run-deploy-locally: - docker run -it -p 8080:8080 \ - -e ES_HOST=${ES_HOST} \ - -e ES_PORT=${ES_PORT} \ - -e ES_USER=${ES_USER} \ - -e ES_PASS=${ES_PASS} \ - stac-fastapi-elasticsearch:latest + docker run -it -p 8084:8084 \ + -e ES_HOST=${MONGO_HOST} \ + -e ES_PORT=${MONGO_PORT} \ + -e ES_USER=${MONGO_USER} \ + -e ES_PASS=${MONGO_PASS} \ + stac-fastapi-mongo:latest .PHONY: image-dev image-dev: docker-compose build -.PHONY: docker-run-es -docker-run-es: image-dev - $(run_es) - -.PHONY: docker-run-os -docker-run-os: image-dev - $(run_os) +.PHONY: docker-run-mongo +docker-run-mongo: image-dev + $(run_mongo) -.PHONY: docker-shell-es -docker-shell-es: - $(run_es) /bin/bash +.PHONY: docker-shell-mongo +docker-shell-mongo: + $(run_mongo) /bin/bash -.PHONY: docker-shell-os -docker-shell-os: - $(run_os) /bin/bash -.PHONY: test-elasticsearch -test-elasticsearch: - -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' - docker-compose down - -.PHONY: test-opensearch -test-opensearch: - -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' +.PHONY: test-mongo +test-mongo: + -$(run_mongo) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh mongo:27017 && cd stac_fastapi/tests/ && pytest' docker-compose down .PHONY: test test: - -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' - docker-compose down - - -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' + -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh mongo:27017 && cd stac_fastapi/tests/ && pytest' docker-compose down -.PHONY: run-database-es -run-database-es: - docker-compose run --rm elasticsearch - -.PHONY: run-database-os -run-database-os: - docker-compose run --rm opensearch +.PHONY: run-database-mongo +run-database-mongo: + docker-compose run --rm mongo .PHONY: pybase-install pybase-install: @@ -97,13 +63,9 @@ pybase-install: pip install -e ./stac_fastapi/extensions[dev] && \ pip install -e ./stac_fastapi/core -.PHONY: install-es +.PHONY: install-mongo install-es: pybase-install - pip install -e ./stac_fastapi/elasticsearch[dev,server] - -.PHONY: install-os -install-os: pybase-install - pip install -e ./stac_fastapi/opensearch[dev,server] + pip install -e ./stac_fastapi/mongo[dev,server] .PHONY: ingest ingest: diff --git a/examples/pip_docker/elasticsearch/config/elasticsearch.yml b/examples/pip_docker/elasticsearch/config/elasticsearch.yml deleted file mode 100644 index 0bf5b680..00000000 --- a/examples/pip_docker/elasticsearch/config/elasticsearch.yml +++ /dev/null @@ -1,18 +0,0 @@ -## Cluster Settings -cluster.name: stac-cluster -node.name: es01 -network.host: 0.0.0.0 -transport.host: 0.0.0.0 -discovery.type: single-node -http.port: 9200 - -path: - repo: - - /usr/share/elasticsearch/snapshots - -## License -xpack.license.self_generated.type: basic - -# Security -xpack.security.enabled: false -xpack.security.transport.ssl.enabled: false \ No newline at end of file diff --git a/scripts/publish b/scripts/publish deleted file mode 100755 index 464bbe05..00000000 --- a/scripts/publish +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -set -e - -if [[ -n "${CI}" ]]; then - set -x -fi - -# Import shared variables -SUBPACKAGE_DIRS=( - "stac_fastapi/types" - "stac_fastapi/extensions" - "stac_fastapi/api" - "stac_fastapi/sqlalchemy" - "stac_fastapi/pgstac" - "stac_fastapi/elasticsearch" -) - -function usage() { - echo -n \ - "Usage: $(basename "$0") -Publish all stac-fastapi packages. - -Options: ---test Publish to test pypi. Requires a 'testpypi' repository - be defined in your .pypirc; - See https://packaging.python.org/guides/using-testpypi/#using-testpypi-with-pip -" -} - -POSITIONAL=() -while [[ $# -gt 0 ]] -do - key="$1" - case $key in - - --help) - usage - exit 0 - shift - ;; - - --test) - TEST_PYPI="--repository testpypi" - shift - ;; - - *) # unknown option - POSITIONAL+=("$1") # save it in an array for later - shift # past argument - ;; - esac -done -set -- "${POSITIONAL[@]}" # restore positional parameters - -# Fail if this isn't CI and we aren't publishing to test pypi -if [ -z "${TEST_PYPI}" ] && [ -z "${CI}" ]; then - echo "Only CI can publish to pypi" - exit 1 -fi - -if [ "${BASH_SOURCE[0]}" = "${0}" ]; then - for PACKAGE_DIR in "${SUBPACKAGE_DIRS[@]}" - do - echo ${PACKAGE_DIR} - pushd ./${PACKAGE_DIR} - rm -rf dist - python setup.py sdist bdist_wheel - twine upload ${TEST_PYPI} dist/* - popd - - done -fi \ No newline at end of file From 1da8e7698e4057da2f6db2d3a322b036c45a8bae Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sun, 3 Mar 2024 19:08:33 +0800 Subject: [PATCH 25/25] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f115d995..8815d393 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Mongo backend for the stac-fastapi project built on top of the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library. -### Note: This is presently in development and for now is mostly a proof of concept showing that other databases can be plugged into the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library, besides for elasticsearch and opensearch. +- Note: This is presently in development and, for now, is a proof of concept project showing that other databases can be plugged into the [sfeos](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) core api library, besides for elasticsearch and opensearch. To install from PyPI: