diff --git a/CHANGELOG.md b/CHANGELOG.md index cc3aa19f..4d13b778 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +- Improved datetime query handling to only check start and end datetime values when datetime is None [#396](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/396) - Optimize data_loader.py script [#395](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/395) ### Removed diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index a1ca6250..2d630769 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -245,121 +245,97 @@ def apply_collections_filter(search: Search, collection_ids: List[str]): @staticmethod def apply_datetime_filter( search: Search, interval: Optional[Union[DateTimeType, str]] - ): + ) -> Search: """Apply a filter to search on datetime, start_datetime, and end_datetime fields. Args: - search (Search): The search object to filter. - interval: Optional[Union[DateTimeType, str]] + search: The search object to filter. + interval: Optional datetime interval to filter by. Can be: + - A single datetime string (e.g., "2023-01-01T12:00:00") + - A datetime range string (e.g., "2023-01-01/2023-12-31") + - A datetime object + - A tuple of (start_datetime, end_datetime) Returns: - Search: The filtered search object. + The filtered search object. """ + if not interval: + return search + should = [] - datetime_search = return_date(interval) + try: + datetime_search = return_date(interval) + except (ValueError, TypeError) as e: + # Handle invalid interval formats if return_date fails + logger.error(f"Invalid interval format: {interval}, error: {e}") + return search - # If the request is a single datetime return - # items with datetimes equal to the requested datetime OR - # the requested datetime is between their start and end datetimes if "eq" in datetime_search: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "term", - properties__datetime=datetime_search["eq"], - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["eq"], - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["eq"], - }, - ), - ], - ), - ] - ) - - # If the request is a date range return - # items with datetimes within the requested date range OR - # their startdatetime ithin the requested date range OR - # their enddatetime ithin the requested date range OR - # the requested daterange within their start and end datetimes + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q("term", **{"properties__datetime": datetime_search["eq"]}), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["eq"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] else: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["gte"] - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["lte"] - }, - ), - ], - ), - ] - ) - - search = search.query(Q("bool", filter=[Q("bool", should=should)])) - - return search + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["lte"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["gte"]}, + ), + ], + ), + ] + + return search.query(Q("bool", should=should, minimum_should_match=1)) @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 88c7fcdc..ba6fb9e8 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -282,121 +282,97 @@ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str] @staticmethod def apply_datetime_filter( search: Search, interval: Optional[Union[DateTimeType, str]] - ): - """Apply a filter to search based on datetime field, start_datetime, and end_datetime fields. + ) -> Search: + """Apply a filter to search on datetime, start_datetime, and end_datetime fields. Args: - search (Search): The search object to filter. - interval: Optional[Union[DateTimeType, str]] + search: The search object to filter. + interval: Optional datetime interval to filter by. Can be: + - A single datetime string (e.g., "2023-01-01T12:00:00") + - A datetime range string (e.g., "2023-01-01/2023-12-31") + - A datetime object + - A tuple of (start_datetime, end_datetime) Returns: - Search: The filtered search object. + The filtered search object. """ + if not interval: + return search + should = [] - datetime_search = return_date(interval) + try: + datetime_search = return_date(interval) + except (ValueError, TypeError) as e: + # Handle invalid interval formats if return_date fails + logger.error(f"Invalid interval format: {interval}, error: {e}") + return search - # If the request is a single datetime return - # items with datetimes equal to the requested datetime OR - # the requested datetime is between their start and end datetimes if "eq" in datetime_search: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "term", - properties__datetime=datetime_search["eq"], - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["eq"], - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["eq"], - }, - ), - ], - ), - ] - ) - - # If the request is a date range return - # items with datetimes within the requested date range OR - # their startdatetime ithin the requested date range OR - # their enddatetime ithin the requested date range OR - # the requested daterange within their start and end datetimes + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q("term", **{"properties__datetime": datetime_search["eq"]}), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["eq"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] else: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["gte"] - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["lte"] - }, - ), - ], - ), - ] - ) - - search = search.query(Q("bool", filter=[Q("bool", should=should)])) - - return search + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["lte"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["gte"]}, + ), + ], + ), + ] + + return search.query(Q("bool", should=should, minimum_should_match=1)) @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 6f344b19..fa701a24 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -1,9 +1,11 @@ import json +import logging import os import uuid from copy import deepcopy from datetime import datetime, timedelta from random import randint +from typing import Dict from urllib.parse import parse_qs, urlparse, urlsplit import ciso8601 @@ -15,7 +17,9 @@ from stac_fastapi.core.datetime_utils import datetime_to_str, now_to_rfc3339_str from stac_fastapi.types.core import LandingPageMixin -from ..conftest import create_item, refresh_indices +from ..conftest import create_collection, create_item, refresh_indices + +logger = logging.getLogger(__name__) if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": from stac_fastapi.opensearch.database_logic import DatabaseLogic @@ -398,8 +402,8 @@ async def test_item_search_temporal_intersecting_window_post(app_client, ctx): test_item = ctx.item item_date = rfc3339_str_to_datetime(test_item["properties"]["datetime"]) - item_date_before = item_date - timedelta(days=10) - item_date_after = item_date - timedelta(days=2) + item_date_before = item_date - timedelta(days=2) # Changed from 10 to 2 + item_date_after = item_date + timedelta(days=2) # Changed from -2 to +2 params = { "collections": [test_item["collection"]], @@ -940,36 +944,183 @@ async def test_search_datetime_validation_errors(app_client): assert resp.status_code == 400 -# this test should probably pass but doesn't - stac-pydantic -# https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/issues/247 - -# @pytest.mark.asyncio -# async def test_item_custom_links(app_client, ctx, txn_client): -# item = ctx.item -# item_id = "test-item-custom-links" -# item["id"] = item_id -# item["links"].append( -# { -# "href": "https://maps.example.com/wms", -# "rel": "wms", -# "type": "image/png", -# "title": "RGB composite visualized through a WMS", -# "wms:layers": ["rgb"], -# "wms:transparent": True, -# } -# ) -# await create_item(txn_client, item) - -# resp = await app_client.get("/search", params={"id": item_id}) -# assert resp.status_code == 200 -# resp_json = resp.json() -# links = resp_json["features"][0]["links"] -# for link in links: -# if link["rel"] == "wms": -# assert link["href"] == "https://maps.example.com/wms" -# assert link["type"] == "image/png" -# assert link["title"] == "RGB composite visualized through a WMS" -# assert link["wms:layers"] == ["rgb"] -# assert link["wms:transparent"] -# return True -# assert False, resp_json +@pytest.mark.asyncio +async def test_item_custom_links(app_client, ctx, txn_client): + item = ctx.item + item_id = "test-item-custom-links" + item["id"] = item_id + item["links"].append( + { + "href": "https://maps.example.com/wms", + "rel": "wms", + "type": "image/png", + "title": "RGB composite visualized through a WMS", + "wms:layers": ["rgb"], + "wms:transparent": True, + } + ) + await create_item(txn_client, item) + + resp = await app_client.get("/search", params={"id": item_id}) + assert resp.status_code == 200 + resp_json = resp.json() + links = resp_json["features"][0]["links"] + for link in links: + if link["rel"] == "wms": + assert link["href"] == "https://maps.example.com/wms" + assert link["type"] == "image/png" + assert link["title"] == "RGB composite visualized through a WMS" + assert link["wms:layers"] == ["rgb"] + assert link["wms:transparent"] + return True + assert False, resp_json + + +async def _search_and_get_ids( + app_client, + endpoint: str = "/search", + method: str = "get", + params: Dict = None, + json: Dict = None, +) -> set: + """Helper to send search request and extract feature IDs.""" + if method == "get": + resp = await app_client.get(endpoint, params=params) + else: + resp = await app_client.post(endpoint, json=json) + + assert resp.status_code == 200, f"Search failed: {resp.text}" + data = resp.json() + return {f["id"] for f in data.get("features", [])} + + +@pytest.mark.asyncio +async def test_search_datetime_with_null_datetime( + app_client, txn_client, load_test_data +): + """Test datetime filtering when properties.datetime is null or set, ensuring start_datetime and end_datetime are set when datetime is null.""" + # Setup: Create test collection + test_collection = load_test_data("test_collection.json") + try: + await create_collection(txn_client, collection=test_collection) + except Exception as e: + logger.error(f"Failed to create collection: {e}") + pytest.fail(f"Collection creation failed: {e}") + + base_item = load_test_data("test_item.json") + collection_id = base_item["collection"] + + # Item 1: Null datetime, valid start/end datetimes + null_dt_item = deepcopy(base_item) + null_dt_item["id"] = "null-datetime-item" + null_dt_item["properties"]["datetime"] = None + null_dt_item["properties"]["start_datetime"] = "2020-01-01T00:00:00Z" + null_dt_item["properties"]["end_datetime"] = "2020-01-02T00:00:00Z" + + # Item 2: Valid datetime, no start/end datetimes + valid_dt_item = deepcopy(base_item) + valid_dt_item["id"] = "valid-datetime-item" + valid_dt_item["properties"]["datetime"] = "2020-01-01T11:00:00Z" + valid_dt_item["properties"]["start_datetime"] = None + valid_dt_item["properties"]["end_datetime"] = None + + # Item 3: Valid datetime outside range, valid start/end datetimes + range_item = deepcopy(base_item) + range_item["id"] = "range-item" + range_item["properties"]["datetime"] = "2020-01-03T00:00:00Z" + range_item["properties"]["start_datetime"] = "2020-01-01T00:00:00Z" + range_item["properties"]["end_datetime"] = "2020-01-02T00:00:00Z" + + # Create valid items + items = [null_dt_item, valid_dt_item, range_item] + for item in items: + try: + await create_item(txn_client, item) + except Exception as e: + logger.error(f"Failed to create item {item['id']}: {e}") + pytest.fail(f"Item creation failed: {e}") + + # Refresh indices once + try: + await refresh_indices(txn_client) + except Exception as e: + logger.error(f"Failed to refresh indices: {e}") + pytest.fail(f"Index refresh failed: {e}") + + # Refresh indices once + try: + await refresh_indices(txn_client) + except Exception as e: + logger.error(f"Failed to refresh indices: {e}") + pytest.fail(f"Index refresh failed: {e}") + + # Test 1: Exact datetime matching valid-datetime-item and null-datetime-item + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T11:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "valid-datetime-item", # Matches properties__datetime + "null-datetime-item", # Matches start_datetime <= datetime <= end_datetime + }, "Exact datetime search failed" + + # Test 2: Range including valid-datetime-item, null-datetime-item, and range-item + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T00:00:00Z/2020-01-03T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "valid-datetime-item", # Matches properties__datetime in range + "null-datetime-item", # Matches start_datetime <= lte, end_datetime >= gte + "range-item", # Matches properties__datetime in range + }, "Range search failed" + + # Test 3: POST request for range matching null-datetime-item and valid-datetime-item + feature_ids = await _search_and_get_ids( + app_client, + method="post", + json={ + "datetime": "2020-01-01T00:00:00Z/2020-01-02T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "null-datetime-item", # Matches start_datetime <= lte, end_datetime >= gte + "valid-datetime-item", # Matches properties__datetime in range + }, "POST range search failed" + + # Test 4: Exact datetime matching only range-item's datetime + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-03T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "range-item", # Matches properties__datetime + }, "Exact datetime for range-item failed" + + # Test 5: Range matching null-datetime-item but not range-item's datetime + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T12:00:00Z/2020-01-02T12:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "null-datetime-item", # Matches start_datetime <= lte, end_datetime >= gte + }, "Range search excluding range-item datetime failed" + + # Cleanup + try: + await txn_client.delete_collection(test_collection["id"]) + except Exception as e: + logger.warning(f"Failed to delete collection: {e}")