Skip to content

Commit 25ebdc4

Browse files
author
Phil Varner
committed
improve elasticsearch mappings
1 parent 8fb8a8e commit 25ebdc4

File tree

5 files changed

+145
-70
lines changed

5 files changed

+145
-70
lines changed

.pre-commit-config.yaml

Lines changed: 51 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,53 @@
11
repos:
2-
- repo: https://github.com/PyCQA/isort
3-
rev: 5.8.0
4-
hooks:
5-
- id: isort
6-
language_version: python3.8
7-
-
8-
repo: https://github.com/psf/black
9-
rev: 20.8b1
10-
hooks:
11-
- id: black
12-
args: ['--safe']
13-
language_version: python3.8
14-
-
15-
repo: https://gitlab.com/pycqa/flake8
16-
rev: 3.9.0
17-
hooks:
18-
- id: flake8
19-
language_version: python3.8
20-
args: [
21-
# E501 let black handle all line length decisions
22-
# W503 black conflicts with "line break before operator" rule
23-
# E203 black conflicts with "whitespace before ':'" rule
24-
'--ignore=E501,W503,E203,C901']
25-
-
26-
repo: https://github.com/chewse/pre-commit-mirrors-pydocstyle
27-
# 2.1.1
28-
rev: v2.1.1
29-
hooks:
30-
- id: pydocstyle
31-
language_version: python3.8
32-
exclude: '.*(test|alembic|scripts).*'
33-
args: [
34-
# Check for docstring presence only
35-
'--select=D1',
2+
- repo: https://github.com/PyCQA/isort
3+
rev: 5.8.0
4+
hooks:
5+
- id: isort
6+
language_version: python3.8
7+
- repo: https://github.com/psf/black
8+
rev: 20.8b1
9+
hooks:
10+
- id: black
11+
args: [ '--safe' ]
12+
language_version: python3.8
13+
- repo: https://gitlab.com/pycqa/flake8
14+
rev: 3.9.0
15+
hooks:
16+
- id: flake8
17+
language_version: python3.8
18+
args: [
19+
# E501 let black handle all line length decisions
20+
# W503 black conflicts with "line break before operator" rule
21+
# E203 black conflicts with "whitespace before ':'" rule
22+
'--ignore=E501,W503,E203,C901' ]
23+
- repo: https://github.com/chewse/pre-commit-mirrors-pydocstyle
24+
# 2.1.1
25+
rev: v2.1.1
26+
hooks:
27+
- id: pydocstyle
28+
language_version: python3.8
29+
exclude: '.*(test|alembic|scripts).*'
30+
args: [
31+
# Check for docstring presence only
32+
'--select=D1',
3633

37-
]
38-
# Don't require docstrings for tests
39-
# '--match=(?!test).*\.py']
40-
# -
41-
# repo: https://github.com/pre-commit/mirrors-mypy
42-
# rev: v0.770
43-
# hooks:
44-
# - id: mypy
45-
# language_version: python3.8
46-
# args: [--no-strict-optional, --ignore-missing-imports]
47-
-
48-
repo: https://github.com/PyCQA/pydocstyle
49-
rev: 6.0.0
50-
hooks:
51-
- id: pydocstyle
52-
language_version: python3.8
53-
exclude: '.*(test|alembic|scripts).*'
54-
#args: [
55-
# Don't require docstrings for tests
56-
#'--match=(?!test|alembic|scripts).*\.py',
57-
#]
34+
]
35+
# Don't require docstrings for tests
36+
# '--match=(?!test).*\.py']
37+
# -
38+
# repo: https://github.com/pre-commit/mirrors-mypy
39+
# rev: v0.770
40+
# hooks:
41+
# - id: mypy
42+
# language_version: python3.8
43+
# args: [--no-strict-optional, --ignore-missing-imports]
44+
- repo: https://github.com/PyCQA/pydocstyle
45+
rev: 6.0.0
46+
hooks:
47+
- id: pydocstyle
48+
language_version: python3.8
49+
exclude: '.*(test|alembic|scripts).*'
50+
#args: [
51+
# Don't require docstrings for tests
52+
#'--match=(?!test|alembic|scripts).*\.py',
53+
#]

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ Prior to commit, run:
1414
pre-commit run --all-files`
1515
```
1616

17+
```shell
18+
cd stac_fastapi/elasticsearch
19+
pip install .[dev]
20+
```
21+
1722
## Building
1823

1924
```
@@ -37,3 +42,7 @@ make test
3742
```
3843
make ingest
3944
```
45+
46+
## Elasticsearch Mappings
47+
48+
Mappings apply to search index, not source.

stac_fastapi/elasticsearch/setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
"pre-commit",
2828
"requests",
2929
"ciso8601",
30+
"overrides",
31+
"black",
3032
],
3133
"docs": ["mkdocs", "mkdocs-material", "pdocs"],
3234
"server": ["uvicorn[standard]>=0.12.0,<0.14.0"],

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/serializers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class ItemSerializer(Serializer):
2424
"""Serialization methods for STAC items."""
2525

2626
@classmethod
27-
def stac_to_db(cls, stac_data: TypedDict, base_url: str) -> stac_types.Item:
27+
def stac_to_db(cls, stac_data: stac_types.Item, base_url: str) -> stac_types.Item:
2828
"""Transform STAC Item to database-ready STAC Item."""
2929
item_links = ItemLinks(
3030
collection_id=stac_data["collection"],

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/transactions.py

Lines changed: 82 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import attr
77
import elasticsearch
88
from elasticsearch import helpers
9+
from overrides import overrides
910

1011
from stac_fastapi.elasticsearch.config import ElasticsearchSettings
1112
from stac_fastapi.elasticsearch.serializers import CollectionSerializer, ItemSerializer
@@ -30,26 +31,87 @@ class TransactionsClient(BaseTransactionsClient):
3031
settings = ElasticsearchSettings()
3132
client = settings.create_client
3233

33-
def _create_item_index(self):
34-
mapping = {
35-
"mappings": {
36-
"properties": {
37-
"geometry": {"type": "geo_shape"},
38-
"id": {"type": "text", "fields": {"keyword": {"type": "keyword"}}},
39-
"properties__datetime": {
40-
"type": "text",
41-
"fields": {"keyword": {"type": "keyword"}},
42-
},
43-
}
34+
dynamicTemplates = [
35+
# Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md
36+
{
37+
"descriptions": {
38+
"match_mapping_type": "string",
39+
"match": "description",
40+
"mapping": {"type": "text"},
41+
}
42+
},
43+
{
44+
"titles": {
45+
"match_mapping_type": "string",
46+
"match": "title",
47+
"mapping": {"type": "text"},
48+
}
49+
},
50+
# Projection Extension https://github.com/stac-extensions/projection
51+
{"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}},
52+
{
53+
"proj_projjson": {
54+
"match": "proj:projjson",
55+
"mapping": {"type": "object", "enabled": False},
56+
}
57+
},
58+
{
59+
"proj_centroid": {
60+
"match_mapping_type": "string",
61+
"match": "proj:centroid",
62+
"mapping": {"type": "geo_point"},
4463
}
45-
}
64+
},
65+
{
66+
"proj_geometry": {
67+
"match_mapping_type": "string",
68+
"match": "proj:geometry",
69+
"mapping": {"type": "geo_shape"},
70+
}
71+
},
72+
{
73+
"no_index_href": {
74+
"match": "href",
75+
"mapping": {"type": "text", "index": False},
76+
}
77+
},
78+
# Default all other strings not otherwise specified to keyword
79+
{"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}},
80+
{"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}},
81+
]
82+
83+
mappings = {
84+
"numeric_detection": False,
85+
"dynamic_templates": dynamicTemplates,
86+
"properties": {
87+
"geometry": {"type": "geo_shape"},
88+
"assets": {"type": "object", "enabled": False},
89+
"links": {"type": "object", "enabled": False},
90+
"properties": {
91+
"type": "object",
92+
"properties": {
93+
# Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md
94+
"datetime": {"type": "date"},
95+
"start_datetime": {"type": "date"},
96+
"end_datetime": {"type": "date"},
97+
"created": {"type": "date"},
98+
"updated": {"type": "date"},
99+
# Satellite Extension https://github.com/stac-extensions/sat
100+
"sat:absolute_orbit": {"type": "integer"},
101+
"sat:relative_orbit": {"type": "integer"},
102+
},
103+
},
104+
},
105+
}
46106

47-
_ = self.client.indices.create(
107+
def _create_item_index(self):
108+
self.client.indices.create(
48109
index="stac_items",
49-
body=mapping,
110+
body=self.mappings,
50111
ignore=400, # ignore 400 already exists code
51112
)
52113

114+
@overrides
53115
def create_item(self, model: stac_types.Item, **kwargs):
54116
"""Create item."""
55117
base_url = str(kwargs["request"].base_url)
@@ -83,6 +145,7 @@ def create_item(self, model: stac_types.Item, **kwargs):
83145
)
84146
return ItemSerializer.db_to_stac(model, base_url)
85147

148+
@overrides
86149
def create_collection(self, model: stac_types.Collection, **kwargs):
87150
"""Create collection."""
88151
base_url = str(kwargs["request"].base_url)
@@ -100,6 +163,7 @@ def create_collection(self, model: stac_types.Collection, **kwargs):
100163
)
101164
return CollectionSerializer.db_to_stac(model, base_url)
102165

166+
@overrides
103167
def update_item(self, model: stac_types.Item, **kwargs):
104168
"""Update item."""
105169
base_url = str(kwargs["request"].base_url)
@@ -118,6 +182,7 @@ def update_item(self, model: stac_types.Item, **kwargs):
118182
# body=model)
119183
return ItemSerializer.db_to_stac(model, base_url)
120184

185+
@overrides
121186
def update_collection(self, model: stac_types.Collection, **kwargs):
122187
"""Update collection."""
123188
base_url = str(kwargs["request"].base_url)
@@ -130,6 +195,7 @@ def update_collection(self, model: stac_types.Collection, **kwargs):
130195

131196
return CollectionSerializer.db_to_stac(model, base_url)
132197

198+
@overrides
133199
def delete_item(self, item_id: str, collection_id: str, **kwargs):
134200
"""Delete item."""
135201
try:
@@ -138,6 +204,7 @@ def delete_item(self, item_id: str, collection_id: str, **kwargs):
138204
raise NotFoundError(f"Item {item_id} not found")
139205
self.client.delete(index="stac_items", doc_type="_doc", id=item_id)
140206

207+
@overrides
141208
def delete_collection(self, collection_id: str, **kwargs):
142209
"""Delete collection."""
143210
try:
@@ -178,6 +245,7 @@ def bulk_sync(self, processed_items):
178245
]
179246
helpers.bulk(self.client, actions)
180247

248+
@overrides
181249
def bulk_item_insert(self, items: Items, **kwargs) -> str:
182250
"""Bulk item insertion using es."""
183251
transactions_client = TransactionsClient()

0 commit comments

Comments
 (0)