Skip to content

Commit 4d3a160

Browse files
author
Phil Varner
committed
use concatenation of id and collection for elasticsearch _id value
1 parent 8c16cfa commit 4d3a160

File tree

6 files changed

+77
-48
lines changed

6 files changed

+77
-48
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ docker-shell:
2525

2626
.PHONY: test
2727
test:
28-
$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd /app/stac_fastapi/elasticsearch/tests/ && pytest'
28+
-$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd /app/stac_fastapi/elasticsearch/tests/ && pytest'
2929
docker-compose down
3030

3131
.PHONY: run-database

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@
3333
COLLECTIONS_INDEX = "stac_collections"
3434

3535

36+
def mk_item_id(item_id: str, collection_id: str):
37+
"""Make the Elasticsearch document _id value from the Item id and collection."""
38+
return f"{item_id}|{collection_id}"
39+
40+
3641
@attr.s
3742
class CoreCrudClient(BaseCoreClient):
3843
"""Client for core endpoints defined by stac."""
@@ -142,7 +147,9 @@ def get_item(self, item_id: str, collection_id: str, **kwargs) -> Item:
142147
"""Get item by item id, collection id."""
143148
base_url = str(kwargs["request"].base_url)
144149
try:
145-
item = self.client.get(index=ITEMS_INDEX, id=item_id)
150+
item = self.client.get(
151+
index=ITEMS_INDEX, id=mk_item_id(item_id, collection_id)
152+
)
146153
except elasticsearch.exceptions.NotFoundError:
147154
raise NotFoundError(
148155
f"Item {item_id} does not exist in Collection {collection_id}"

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from overrides import overrides
1111

1212
from stac_fastapi.elasticsearch.config import ElasticsearchSettings
13-
from stac_fastapi.elasticsearch.core import COLLECTIONS_INDEX, ITEMS_INDEX
13+
from stac_fastapi.elasticsearch.core import COLLECTIONS_INDEX, ITEMS_INDEX, mk_item_id
1414
from stac_fastapi.elasticsearch.serializers import CollectionSerializer, ItemSerializer
1515
from stac_fastapi.elasticsearch.session import Session
1616
from stac_fastapi.extensions.third_party.bulk_transactions import (
@@ -42,29 +42,39 @@ def create_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
4242
if item["type"] == "FeatureCollection":
4343
bulk_client = BulkTransactionsClient()
4444
processed_items = [
45-
bulk_client._preprocess_item(item, base_url)
46-
for item in item["features"]
45+
bulk_client.preprocess_item(item, base_url) for item in item["features"]
4746
]
4847
return_msg = f"Successfully added {len(processed_items)} items."
4948
bulk_client.bulk_sync(processed_items)
5049

5150
return return_msg
52-
53-
# If a single item is posted
54-
if not self.client.exists(index=COLLECTIONS_INDEX, id=item["collection"]):
55-
raise ForeignKeyError(f"Collection {item['collection']} does not exist")
56-
57-
if self.client.exists(index=ITEMS_INDEX, id=item["id"]):
58-
raise ConflictError(
59-
f"Item {item['id']} in collection {item['collection']} already exists"
51+
else:
52+
# TODO
53+
if self.client.exists(
54+
index=ITEMS_INDEX, id=mk_item_id(item["id"], item["collection"])
55+
):
56+
raise ConflictError(
57+
f"Item {item['id']} in collection {item['collection']} already exists"
58+
)
59+
60+
# todo: check if collection exists, but cache
61+
if not self.client.exists(index=COLLECTIONS_INDEX, id=item["collection"]):
62+
raise ForeignKeyError(f"Collection {item['collection']} does not exist")
63+
64+
item = BulkTransactionsClient().preprocess_item(item, base_url)
65+
66+
es_resp = self.client.index(
67+
index=ITEMS_INDEX,
68+
id=mk_item_id(item["id"], item["collection"]),
69+
document=item,
6070
)
6171

62-
data = ItemSerializer.stac_to_db(item, base_url)
72+
if (meta := es_resp.get("meta")) and meta.get("status") == 409:
73+
raise ConflictError(
74+
f"Item {item['id']} in collection {item['collection']} already exists"
75+
)
6376

64-
self.client.index(
65-
index=ITEMS_INDEX, doc_type="_doc", id=item["id"], document=data
66-
)
67-
return ItemSerializer.db_to_stac(item, base_url)
77+
return item
6878

6979
@overrides
7080
def update_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
@@ -75,14 +85,11 @@ def update_item(self, item: stac_types.Item, **kwargs) -> stac_types.Item:
7585

7686
if not self.client.exists(index=COLLECTIONS_INDEX, id=item["collection"]):
7787
raise ForeignKeyError(f"Collection {item['collection']} does not exist")
78-
if not self.client.exists(index=ITEMS_INDEX, id=item["id"]):
79-
raise NotFoundError(
80-
f"Item {item['id']} in collection {item['collection']} doesn't exist"
81-
)
88+
89+
# todo: index instead of delete and create
8290
self.delete_item(item["id"], item["collection"])
8391
self.create_item(item, **kwargs)
84-
# self.client.update(index=ITEMS_INDEX,doc_type='_doc',id=model["id"],
85-
# body=model)
92+
# self.client.update(index=ITEMS_INDEX,id=item["id"], body=item)
8693
return ItemSerializer.db_to_stac(item, base_url)
8794

8895
@overrides
@@ -91,10 +98,12 @@ def delete_item(
9198
) -> stac_types.Item:
9299
"""Delete item."""
93100
try:
94-
_ = self.client.get(index=ITEMS_INDEX, id=item_id)
101+
self.client.delete(index=ITEMS_INDEX, id=mk_item_id(item_id, collection_id))
95102
except elasticsearch.exceptions.NotFoundError:
96-
raise NotFoundError(f"Item {item_id} not found")
97-
self.client.delete(index=ITEMS_INDEX, doc_type="_doc", id=item_id)
103+
raise NotFoundError(
104+
f"Item {item_id} in collection {collection_id} not found"
105+
)
106+
return None
98107

99108
@overrides
100109
def create_collection(
@@ -109,9 +118,9 @@ def create_collection(
109118

110119
if self.client.exists(index=COLLECTIONS_INDEX, id=collection["id"]):
111120
raise ConflictError(f"Collection {collection['id']} already exists")
121+
112122
self.client.index(
113123
index=COLLECTIONS_INDEX,
114-
doc_type="_doc",
115124
id=collection["id"],
116125
document=collection,
117126
)
@@ -139,7 +148,8 @@ def delete_collection(self, collection_id: str, **kwargs) -> stac_types.Collecti
139148
_ = self.client.get(index=COLLECTIONS_INDEX, id=collection_id)
140149
except elasticsearch.exceptions.NotFoundError:
141150
raise NotFoundError(f"Collection {collection_id} not found")
142-
self.client.delete(index=COLLECTIONS_INDEX, doc_type="_doc", id=collection_id)
151+
self.client.delete(index=COLLECTIONS_INDEX, id=collection_id)
152+
return None
143153

144154

145155
@attr.s
@@ -153,38 +163,45 @@ def __attrs_post_init__(self):
153163
settings = ElasticsearchSettings()
154164
self.client = settings.create_client
155165

156-
def _preprocess_item(self, model: stac_types.Item, base_url) -> stac_types.Item:
166+
def preprocess_item(self, item: stac_types.Item, base_url) -> stac_types.Item:
157167
"""Preprocess items to match data model."""
158-
if not self.client.exists(index=COLLECTIONS_INDEX, id=model["collection"]):
159-
raise ForeignKeyError(f"Collection {model['collection']} does not exist")
168+
if not self.client.exists(index=COLLECTIONS_INDEX, id=item["collection"]):
169+
raise ForeignKeyError(f"Collection {item['collection']} does not exist")
160170

161-
if self.client.exists(index=ITEMS_INDEX, id=model["id"]):
171+
if self.client.exists(index=ITEMS_INDEX, id=item["id"]):
162172
raise ConflictError(
163-
f"Item {model['id']} in collection {model['collection']} already exists"
173+
f"Item {item['id']} in collection {item['collection']} already exists"
164174
)
165175

166-
item = ItemSerializer.stac_to_db(model, base_url)
167-
return item
176+
return ItemSerializer.stac_to_db(item, base_url)
168177

169178
def bulk_sync(self, processed_items):
170179
"""Elasticsearch bulk insertion."""
171-
actions = [{"_index": ITEMS_INDEX, "_source": item} for item in processed_items]
180+
actions = [
181+
{
182+
"_index": ITEMS_INDEX,
183+
"_id": mk_item_id(item["id"], item["collection"]),
184+
"_source": item,
185+
}
186+
for item in processed_items
187+
]
172188
helpers.bulk(self.client, actions)
173189

174190
@overrides
175191
def bulk_item_insert(
176192
self, items: Items, chunk_size: Optional[int] = None, **kwargs
177193
) -> str:
178194
"""Bulk item insertion using es."""
179-
try:
180-
base_url = str(kwargs["request"].base_url)
181-
except Exception:
195+
request = kwargs.get("request")
196+
if request:
197+
base_url = str(request.base_url)
198+
else:
182199
base_url = ""
200+
183201
processed_items = [
184-
self._preprocess_item(item, base_url) for item in items.items.values()
202+
self.preprocess_item(item, base_url) for item in items.items.values()
185203
]
186-
return_msg = f"Successfully added {len(processed_items)} items."
187204

188205
self.bulk_sync(processed_items)
189206

190-
return return_msg
207+
return f"Successfully added {len(processed_items)} Items."

stac_fastapi/elasticsearch/tests/clients/test_elasticsearch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,19 @@ def test_get_item(
100100
item_data = load_test_data("test_item.json")
101101
es_transactions.create_collection(collection_data, request=MockStarletteRequest)
102102
es_transactions.create_item(item_data, request=MockStarletteRequest)
103-
coll = es_core.get_item(
103+
got_item = es_core.get_item(
104104
item_id=item_data["id"],
105105
collection_id=item_data["collection"],
106106
request=MockStarletteRequest,
107107
)
108-
assert coll["id"] == item_data["id"]
109-
assert coll["collection"] == item_data["collection"]
108+
assert got_item["id"] == item_data["id"]
109+
assert got_item["collection"] == item_data["collection"]
110110

111111
es_transactions.delete_collection(
112112
collection_data["id"], request=MockStarletteRequest
113113
)
114114
es_transactions.delete_item(
115-
item_data["id"], coll["id"], request=MockStarletteRequest
115+
item_data["id"], item_data["collection"], request=MockStarletteRequest
116116
)
117117

118118

stac_fastapi/elasticsearch/tests/conftest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,10 @@ def app_client(api_client, load_test_data):
153153
try:
154154
client.create_collection(coll, request=MockStarletteRequest)
155155
except ConflictError:
156-
pass
156+
try:
157+
client.delete_item("test-item", "test-collection")
158+
except Exception:
159+
pass
157160

158161
with TestClient(api_client.app) as test_app:
159162
yield test_app

stac_fastapi/elasticsearch/tests/resources/test_item.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ def test_update_item_missing_collection(app_client, load_test_data):
143143
def test_update_item_geometry(app_client, load_test_data):
144144
test_item = load_test_data("test_item.json")
145145

146+
test_item["id"] = "update_test_item_1"
147+
146148
# Create the item
147149
resp = app_client.post(
148150
f"/collections/{test_item['collection']}/items", json=test_item

0 commit comments

Comments
 (0)