Skip to content

Commit ba11e0c

Browse files
authored
Merge pull request #178 from jamesfisher-gis/like_filter
LIKE, IN, BETWEEN operators -- filter extension
2 parents 61c01cb + e7330dc commit ba11e0c

File tree

8 files changed

+306
-3
lines changed

8 files changed

+306
-3
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
99

1010
### Added
1111

12+
- Advanced comparison (LIKE, IN, BETWEEN) operators to the Filter extension [#178](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/178)
13+
1214
### Changed
1315

1416
- Elasticsearch drivers from 7.17.9 to 8.11.0 [#169](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/169)

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
settings = ElasticsearchSettings()
2525
session = Session.create_from_settings(settings)
2626

27+
filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient())
28+
filter_extension.conformance_classes.append(
29+
"http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators"
30+
)
31+
2732
extensions = [
2833
TransactionExtension(client=TransactionsClient(session=session), settings=settings),
2934
BulkTransactionExtension(client=BulkTransactionsClient(session=session)),
@@ -32,7 +37,7 @@
3237
SortExtension(),
3338
TokenPaginationExtension(),
3439
ContextExtension(),
35-
FilterExtension(client=EsAsyncBaseFiltersClient()),
40+
filter_extension,
3641
]
3742

3843
post_request_model = create_post_request_model(extensions)

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/extensions/filter.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
Basic CQL2 (AND, OR, NOT), comparison operators (=, <>, <, <=, >, >=), and IS NULL.
55
The comparison operators are allowed against string, numeric, boolean, date, and datetime types.
66
7+
Advanced comparison operators (http://www.opengis.net/spec/cql2/1.0/req/advanced-comparison-operators)
8+
defines the LIKE, IN, and BETWEEN operators.
9+
710
Basic Spatial Operators (http://www.opengis.net/spec/cql2/1.0/conf/basic-spatial-operators)
811
defines the intersects operator (S_INTERSECTS).
912
"""
1013
from __future__ import annotations
1114

1215
import datetime
16+
import re
1317
from enum import Enum
1418
from typing import List, Union
1519

@@ -78,6 +82,17 @@ def to_es(self):
7882
)
7983

8084

85+
class AdvancedComparisonOp(str, Enum):
86+
"""Advanced Comparison operator.
87+
88+
CQL2 advanced comparison operators like (~), between, and in.
89+
"""
90+
91+
like = "like"
92+
between = "between"
93+
_in = "in"
94+
95+
8196
class SpatialIntersectsOp(str, Enum):
8297
"""Spatial intersections operator s_intersects."""
8398

@@ -152,8 +167,8 @@ def validate(cls, v):
152167
class Clause(BaseModel):
153168
"""Filter extension clause."""
154169

155-
op: Union[LogicalOp, ComparisonOp, SpatialIntersectsOp]
156-
args: List[Arg]
170+
op: Union[LogicalOp, ComparisonOp, AdvancedComparisonOp, SpatialIntersectsOp]
171+
args: List[Union[Arg, List[Arg]]]
157172

158173
def to_es(self):
159174
"""Generate an Elasticsearch expression for this Clause."""
@@ -171,6 +186,30 @@ def to_es(self):
171186
"must_not": [{"term": {to_es(self.args[0]): to_es(self.args[1])}}]
172187
}
173188
}
189+
elif self.op == AdvancedComparisonOp.like:
190+
return {
191+
"wildcard": {
192+
to_es(self.args[0]): {
193+
"value": cql2_like_to_es(str(to_es(self.args[1]))),
194+
"case_insensitive": "false",
195+
}
196+
}
197+
}
198+
elif self.op == AdvancedComparisonOp.between:
199+
return {
200+
"range": {
201+
to_es(self.args[0]): {
202+
"gte": to_es(self.args[1]),
203+
"lte": to_es(self.args[2]),
204+
}
205+
}
206+
}
207+
elif self.op == AdvancedComparisonOp._in:
208+
if not isinstance(self.args[1], List):
209+
raise RuntimeError(f"Arg {self.args[1]} is not a list")
210+
return {
211+
"terms": {to_es(self.args[0]): [to_es(arg) for arg in self.args[1]]}
212+
}
174213
elif (
175214
self.op == ComparisonOp.lt
176215
or self.op == ComparisonOp.lte
@@ -210,3 +249,19 @@ def to_es(arg: Arg):
210249
return arg
211250
else:
212251
raise RuntimeError(f"unknown arg {repr(arg)}")
252+
253+
254+
def cql2_like_to_es(string):
255+
"""Convert wildcard characters in CQL2 ('_' and '%') to Elasticsearch wildcard characters ('?' and '*', respectively). Handle escape characters and pass through Elasticsearch wildcards."""
256+
percent_pattern = r"(?<!\\)%"
257+
underscore_pattern = r"(?<!\\)_"
258+
escape_pattern = r"\\(?=[_%])"
259+
260+
for pattern in [
261+
(percent_pattern, "*"),
262+
(underscore_pattern, "?"),
263+
(escape_pattern, ""),
264+
]:
265+
string = re.sub(pattern[0], pattern[1], string)
266+
267+
return string
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"op": "like",
3+
"args": [
4+
{
5+
"property": "scene_id"
6+
},
7+
"LC82030282019133%"
8+
]
9+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"op": "like",
3+
"args": [
4+
{
5+
"property": "scene_id"
6+
},
7+
"LC82030282019133LGN0_"
8+
]
9+
}
10+
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"op": "and",
3+
"args": [
4+
{
5+
"op": "between",
6+
"args": [
7+
{
8+
"property": "cloud_cover"
9+
},
10+
0.1,
11+
0.2
12+
]
13+
},
14+
{
15+
"op": "=",
16+
"args": [
17+
{
18+
"property": "landsat:wrs_row"
19+
},
20+
28
21+
]
22+
},
23+
{
24+
"op": "=",
25+
"args": [
26+
{
27+
"property": "landsat:wrs_path"
28+
},
29+
203
30+
]
31+
}
32+
]
33+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"op": "and",
3+
"args": [
4+
{
5+
"op": "in",
6+
"args": [
7+
{"property": "id"},
8+
["LC08_L1TP_060247_20180905_20180912_01_T1_L1TP"]
9+
]
10+
},
11+
{"op": "=", "args": [{"property": "collection"}, "landsat8_l1tp"]}
12+
]
13+
}

stac_fastapi/elasticsearch/tests/extensions/test_filter.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,179 @@ async def test_search_filter_extension_floats_post(app_client, ctx):
213213

214214
assert resp.status_code == 200
215215
assert len(resp.json()["features"]) == 1
216+
217+
218+
@pytest.mark.asyncio
219+
async def test_search_filter_extension_wildcard_cql2(app_client, ctx):
220+
single_char = ctx.item["id"][:-1] + "_"
221+
multi_char = ctx.item["id"][:-3] + "%"
222+
223+
params = {
224+
"filter": {
225+
"op": "and",
226+
"args": [
227+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
228+
{
229+
"op": "like",
230+
"args": [
231+
{"property": "id"},
232+
single_char,
233+
],
234+
},
235+
{
236+
"op": "like",
237+
"args": [
238+
{"property": "id"},
239+
multi_char,
240+
],
241+
},
242+
],
243+
}
244+
}
245+
246+
resp = await app_client.post("/search", json=params)
247+
248+
assert resp.status_code == 200
249+
assert len(resp.json()["features"]) == 1
250+
251+
252+
@pytest.mark.asyncio
253+
async def test_search_filter_extension_wildcard_es(app_client, ctx):
254+
single_char = ctx.item["id"][:-1] + "?"
255+
multi_char = ctx.item["id"][:-3] + "*"
256+
257+
params = {
258+
"filter": {
259+
"op": "and",
260+
"args": [
261+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
262+
{
263+
"op": "like",
264+
"args": [
265+
{"property": "id"},
266+
single_char,
267+
],
268+
},
269+
{
270+
"op": "like",
271+
"args": [
272+
{"property": "id"},
273+
multi_char,
274+
],
275+
},
276+
],
277+
}
278+
}
279+
280+
resp = await app_client.post("/search", json=params)
281+
282+
assert resp.status_code == 200
283+
assert len(resp.json()["features"]) == 1
284+
285+
286+
@pytest.mark.asyncio
287+
async def test_search_filter_extension_escape_chars(app_client, ctx):
288+
esc_chars = (
289+
ctx.item["properties"]["landsat:product_id"].replace("_", "\\_")[:-1] + "_"
290+
)
291+
292+
params = {
293+
"filter": {
294+
"op": "and",
295+
"args": [
296+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
297+
{
298+
"op": "like",
299+
"args": [
300+
{"property": "properties.landsat:product_id"},
301+
esc_chars,
302+
],
303+
},
304+
],
305+
}
306+
}
307+
308+
resp = await app_client.post("/search", json=params)
309+
310+
assert resp.status_code == 200
311+
assert len(resp.json()["features"]) == 1
312+
313+
314+
@pytest.mark.asyncio
315+
async def test_search_filter_extension_in(app_client, ctx):
316+
product_id = ctx.item["properties"]["landsat:product_id"]
317+
318+
params = {
319+
"filter": {
320+
"op": "and",
321+
"args": [
322+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
323+
{
324+
"op": "in",
325+
"args": [
326+
{"property": "properties.landsat:product_id"},
327+
[product_id],
328+
],
329+
},
330+
],
331+
}
332+
}
333+
334+
resp = await app_client.post("/search", json=params)
335+
336+
assert resp.status_code == 200
337+
assert len(resp.json()["features"]) == 1
338+
339+
340+
@pytest.mark.asyncio
341+
async def test_search_filter_extension_in_no_list(app_client, ctx):
342+
product_id = ctx.item["properties"]["landsat:product_id"]
343+
344+
params = {
345+
"filter": {
346+
"op": "and",
347+
"args": [
348+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
349+
{
350+
"op": "in",
351+
"args": [
352+
{"property": "properties.landsat:product_id"},
353+
product_id,
354+
],
355+
},
356+
],
357+
}
358+
}
359+
360+
resp = await app_client.post("/search", json=params)
361+
362+
assert resp.status_code == 400
363+
assert resp.json() == {
364+
"detail": f"Error with cql2_json filter: Arg {product_id} is not a list"
365+
}
366+
367+
368+
@pytest.mark.asyncio
369+
async def test_search_filter_extension_between(app_client, ctx):
370+
sun_elevation = ctx.item["properties"]["view:sun_elevation"]
371+
372+
params = {
373+
"filter": {
374+
"op": "and",
375+
"args": [
376+
{"op": "=", "args": [{"property": "id"}, ctx.item["id"]]},
377+
{
378+
"op": "between",
379+
"args": [
380+
{"property": "properties.view:sun_elevation"},
381+
sun_elevation - 0.01,
382+
sun_elevation + 0.01,
383+
],
384+
},
385+
],
386+
}
387+
}
388+
resp = await app_client.post("/search", json=params)
389+
390+
assert resp.status_code == 200
391+
assert len(resp.json()["features"]) == 1

0 commit comments

Comments
 (0)