Skip to content

[DE-558] ArangoSearch column cache #258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
main
----

* Added cache and primaryKeyCache parameters to the inverted index API.

* Added allow_retry query parameter, making it possible to retry fetching the latest batch from a cursor.

* Added OverloadControlDatabase, enabling the client to react effectively to potential server overloads.
Expand Down
35 changes: 25 additions & 10 deletions arango/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,8 @@ def add_inverted_index(
includeAllFields: Optional[bool] = None,
trackListPositions: Optional[bool] = None,
searchField: Optional[bool] = None,
primaryKeyCache: Optional[bool] = None,
cache: Optional[bool] = None,
) -> Result[Json]:
"""Create a new inverted index, introduced in version 3.10.

Expand All @@ -1351,22 +1353,31 @@ def add_inverted_index(
:type name: str | None
:param inBackground: Do not hold the collection lock.
:type inBackground: bool | None
:param parallelism:
:param parallelism: The number of threads to use for indexing the fields.
:type parallelism: int | None
:param primarySort:
:type primarySort: Json | None
:param storedValues:
:param primarySort: Primary sort order to enable an AQL optimization.
:type primarySort: Optional[Json]
:param storedValues: An array of objects with paths to additional
attributes to store in the index.
:type storedValues: Sequence[Json] | None
:param analyzer:
:type analyzer: str | None
:param features:
:param analyzer: Analyzer to use by default.
:type analyzer: Optional[str]
:param features: List of Analyzer features.
:type features: Sequence[str] | None
:param includeAllFields:
:param includeAllFields: This option only applies if you use the
inverted index in search-alias views.
:type includeAllFields: bool | None
:param trackListPositions:
:param trackListPositions: This option only applies if you use the
inverted index in search-alias views, and searchField is true.
:type trackListPositions: bool | None
:param searchField:
:param searchField: This option only applies if you use the inverted
index in search-alias views
:type searchField: bool | None
:param primaryKeyCache: Always cache the primary key column in memory.
:type primaryKeyCache: bool | None
:param cache: Always cache the field normalization values in memory
for all fields by default.
:type cache: bool | None
:return: New index details.
:rtype: dict
:raise arango.exceptions.IndexCreateError: If create fails.
Expand Down Expand Up @@ -1395,6 +1406,10 @@ def add_inverted_index(
data["searchField"] = searchField
if fields is not None:
data["fields"] = fields
if primaryKeyCache is not None:
data["primaryKeyCache"] = primaryKeyCache
if cache is not None:
data["cache"] = cache

return self._add_index(data)

Expand Down
40 changes: 40 additions & 0 deletions arango/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,40 @@ def format_index(body: Json) -> Json:
result["legacyPolygons"] = body["legacyPolygons"]
if "estimates" in body:
result["estimates"] = body["estimates"]
if "analyzer" in body:
result["analyzer"] = body["analyzer"]
if "cleanupIntervalStep" in body:
result["cleanup_interval_step"] = body["cleanupIntervalStep"]
if "commitIntervalMsec" in body:
result["commit_interval_msec"] = body["commitIntervalMsec"]
if "consolidationIntervalMsec" in body:
result["consolidation_interval_msec"] = body["consolidationIntervalMsec"]
if "consolidationPolicy" in body:
result["consolidation_policy"] = format_view_consolidation_policy(
body["consolidationPolicy"]
)
if "features" in body:
result["features"] = body["features"]
if "includeAllFields" in body:
result["include_all_fields"] = body["includeAllFields"]
if "primarySort" in body:
result["primary_sort"] = body["primarySort"]
if "searchField" in body:
result["search_field"] = body["searchField"]
if "trackListPositions" in body:
result["track_list_positions"] = body["trackListPositions"]
if "version" in body:
result["version"] = body["version"]
if "cache" in body:
result["cache"] = body["cache"]
if "primaryKeyCache" in body:
result["primaryKeyCache"] = body["primaryKeyCache"]
if "writebufferIdle" in body:
result["writebuffer_idle"] = body["writebufferIdle"]
if "writebufferActive" in body:
result["writebuffer_active"] = body["writebufferActive"]
if "writebufferSizeMax" in body:
result["writebuffer_max_size"] = body["writebufferSizeMax"]

return verify_format(body, result)

Expand Down Expand Up @@ -902,6 +936,12 @@ def format_view(body: Json) -> Json:
if "indexes" in body:
result["indexes"] = body["indexes"]

# Introduced in 3.9.6 EE
if "primaryKeyCache" in body:
result["primaryKeyCache"] = body["primaryKeyCache"]
if "primarySortCache" in body:
result["primarySortCache"] = body["primarySortCache"]

# Introduced in 3.12 EE
if "optimizeTopK" in body:
result["optimizeTopK"] = body["optimizeTopK"]
Expand Down
31 changes: 31 additions & 0 deletions tests/test_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import pytest
from packaging import version

from arango.exceptions import (
IndexCreateError,
IndexDeleteError,
Expand Down Expand Up @@ -197,6 +200,34 @@ def test_add_ttl_index(icol):
icol.delete_index(result["id"])


def test_add_inverted_index(icol, enterprise, db_version):
if db_version < version.parse("3.10.0"):
pytest.skip("Inverted indexes are not supported before 3.10.0")

parameters = dict(
fields=[{"name": "attr1", "cache": True}],
name="c0_cached",
storedValues=[{"fields": ["a"], "compression": "lz4", "cache": True}],
includeAllFields=True,
analyzer="identity",
primarySort={"cache": True, "fields": [{"field": "a", "direction": "asc"}]},
)
expected_keys = ["primary_sort", "analyzer", "include_all_fields", "search_field"]

if enterprise and db_version >= version.parse("3.10.2"):
parameters["cache"] = True
parameters["primaryKeyCache"] = True
expected_keys.extend(["cache", "primaryKeyCache"])

result = icol.add_inverted_index(**parameters)
assert result["id"] in extract("id", icol.indexes())

for key in expected_keys:
assert key in result

icol.delete_index(result["id"])


def test_delete_index(icol, bad_col):
old_indexes = set(extract("id", icol.indexes()))
icol.add_hash_index(["attr3", "attr4"], unique=True)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def test_arangosearch_view_properties(db, col, enterprise, db_version):
}
)

if db_version >= version.parse("3.9.6"):
params.update({"primarySortCache": True, "primaryKeyCache": True})
if db_version >= version.parse("3.10.3"):
params.update({"storedValues": ["attr1", "attr2"]})

Expand Down