Skip to content

Commit ace0c7a

Browse files
authored
Make cql2_like_to_es() understand escaped backslashes (#286)
**Related Issue(s):** - Closes #285 **Description:** This is a break/fix PR. The first commit adds a suite of tests that document correct LIKE-to-wildcard query value conversions, and then fixes the `cql2_like_to_es()` code to correctly process escaped backslashes. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) _(`make test` fails because the Docker container port ranges don't match, but CI, which doesn't use `make test`, passes)_ - [ ] Documentation has been updated to reflect changes, if applicable _n/a, no docs in repo_ - [x] Changes are added to the changelog
1 parent 9a51574 commit ace0c7a

File tree

3 files changed

+76
-15
lines changed

3 files changed

+76
-15
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
66
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
77

8+
## [Unreleased]
9+
10+
### Changed
11+
- Support escaped backslashes in CQL2 `LIKE` queries, and reject invalid (or incomplete) escape sequences. [#286](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/286)
12+
813
## [v3.0.0] - 2024-08-14
914

1015
### Changed

stac_fastapi/core/stac_fastapi/core/extensions/filter.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,41 @@
1717
from enum import Enum
1818
from typing import Any, Dict
1919

20+
_cql2_like_patterns = re.compile(r"\\.|[%_]|\\$")
21+
_valid_like_substitutions = {
22+
"\\\\": "\\",
23+
"\\%": "%",
24+
"\\_": "_",
25+
"%": "*",
26+
"_": "?",
27+
}
28+
29+
30+
def _replace_like_patterns(match: re.Match) -> str:
31+
pattern = match.group()
32+
try:
33+
return _valid_like_substitutions[pattern]
34+
except KeyError:
35+
raise ValueError(f"'{pattern}' is not a valid escape sequence")
36+
2037

2138
def cql2_like_to_es(string: str) -> str:
2239
"""
23-
Convert CQL2 wildcard characters to Elasticsearch wildcard characters. Specifically, it converts '_' to '?' and '%' to '*', handling escape characters properly.
40+
Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters.
2441
2542
Args:
2643
string (str): The string containing CQL2 wildcard characters.
2744
2845
Returns:
2946
str: The converted string with Elasticsearch compatible wildcards.
47+
48+
Raises:
49+
ValueError: If an invalid escape sequence is encountered.
3050
"""
31-
# Translate '%' and '_' only if they are not preceded by a backslash '\'
32-
percent_pattern = r"(?<!\\)%"
33-
underscore_pattern = r"(?<!\\)_"
34-
# Remove the escape character before '%' or '_'
35-
escape_pattern = r"\\(?=[_%])"
36-
37-
# Replace '%' with '*' for broad wildcard matching
38-
string = re.sub(percent_pattern, "*", string)
39-
# Replace '_' with '?' for single character wildcard matching
40-
string = re.sub(underscore_pattern, "?", string)
41-
# Remove the escape character used in the CQL2 format
42-
string = re.sub(escape_pattern, "", string)
43-
44-
return string
51+
return _cql2_like_patterns.sub(
52+
repl=_replace_like_patterns,
53+
string=string,
54+
)
4555

4656

4757
class LogicalOp(str, Enum):
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import pytest
2+
3+
from stac_fastapi.core.extensions.filter import cql2_like_to_es
4+
5+
6+
@pytest.mark.parametrize(
7+
"cql2_value, expected_es_value",
8+
(
9+
# no-op
10+
("", ""),
11+
# backslash
12+
("\\\\", "\\"),
13+
# percent
14+
("%", "*"),
15+
(r"\%", "%"),
16+
(r"\\%", r"\*"),
17+
(r"\\\%", r"\%"),
18+
# underscore
19+
("_", "?"),
20+
(r"\_", "_"),
21+
(r"\\_", r"\?"),
22+
(r"\\\_", r"\_"),
23+
),
24+
)
25+
def test_cql2_like_to_es_success(cql2_value: str, expected_es_value: str) -> None:
26+
"""Verify CQL2 LIKE query strings are converted correctly."""
27+
28+
assert cql2_like_to_es(cql2_value) == expected_es_value
29+
30+
31+
@pytest.mark.parametrize(
32+
"cql2_value",
33+
(
34+
pytest.param("\\", id="trailing backslash escape"),
35+
pytest.param("\\1", id="invalid escape sequence"),
36+
),
37+
)
38+
def test_cql2_like_to_es_invalid(cql2_value: str) -> None:
39+
"""Verify that incomplete or invalid escape sequences are rejected.
40+
41+
CQL2 currently doesn't appear to define how to handle invalid escape sequences.
42+
This test assumes that undefined behavior is caught.
43+
"""
44+
45+
with pytest.raises(ValueError):
46+
cql2_like_to_es(cql2_value)

0 commit comments

Comments
 (0)