Skip to content

[DE-562] Index Cache Refilling #259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
main
----

* Added new per-operation option `refillIndexCache` to write operations:

- single-document write operations (insert, replace, update, delete)
- multi-document write operations (insert_many, replace_many, update_many, delete_many)

If the option is set to `True`, new entries are added to in-memory index caches if
document operations affect the edge index or cache-enabled persistent indexes. Every
currently running transaction will keep track of which in-memory index cache entries
were invalidated by the transaction, and will try to (re-)fill them later.

Example:
```python
collection.insert({"foo": "bar"}, refillIndexCaches=True)
db.aql.execute("INSERT {foo: bar} INTO collection OPTIONS { refillIndexCaches: true }")
```

7.5.9
-----

* Added cache and primaryKeyCache parameters to the inverted index API.

* Added allow_retry query parameter, making it possible to retry fetching the latest batch from a cursor.
Expand Down
74 changes: 72 additions & 2 deletions arango/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,7 @@ def insert_many(
overwrite_mode: Optional[str] = None,
keep_none: Optional[bool] = None,
merge: Optional[bool] = None,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, List[Union[Json, ArangoServerError]]]]:
"""Insert multiple documents.

Expand Down Expand Up @@ -1518,8 +1519,10 @@ def insert_many(
instead of the new one overwriting the old one. Applies only when
**overwrite_mode** is set to "update" (update-insert).
:type merge: bool | None
:return: Document metadata (e.g. document key, revision) or True if
parameter **silent** was set to True.
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document insertions affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: List of document metadata (e.g. document keys, revisions) and
any exception, or True if parameter **silent** was set to True.
:rtype: [dict | ArangoServerError] | bool
Expand All @@ -1543,6 +1546,10 @@ def insert_many(
if merge is not None:
params["mergeObjects"] = merge

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

request = Request(
method="post",
endpoint=f"/_api/document/{self.name}",
Expand Down Expand Up @@ -1582,6 +1589,7 @@ def update_many(
return_old: bool = False,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, List[Union[Json, ArangoServerError]]]]:
"""Update multiple documents.

Expand Down Expand Up @@ -1624,6 +1632,10 @@ def update_many(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document operations affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: List of document metadata (e.g. document keys, revisions) and
any exceptions, or True if parameter **silent** was set to True.
:rtype: [dict | ArangoError] | bool
Expand All @@ -1641,6 +1653,10 @@ def update_many(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

documents = [self._ensure_key_in_body(doc) for doc in documents]

request = Request(
Expand Down Expand Up @@ -1753,6 +1769,7 @@ def replace_many(
return_old: bool = False,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, List[Union[Json, ArangoServerError]]]]:
"""Replace multiple documents.

Expand Down Expand Up @@ -1790,6 +1807,10 @@ def replace_many(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document operations affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: List of document metadata (e.g. document keys, revisions) and
any exceptions, or True if parameter **silent** was set to True.
:rtype: [dict | ArangoServerError] | bool
Expand All @@ -1805,6 +1826,10 @@ def replace_many(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

documents = [self._ensure_key_in_body(doc) for doc in documents]

request = Request(
Expand Down Expand Up @@ -1901,6 +1926,7 @@ def delete_many(
check_rev: bool = True,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, List[Union[Json, ArangoServerError]]]]:
"""Delete multiple documents.

Expand Down Expand Up @@ -1933,6 +1959,10 @@ def delete_many(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document operations affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: List of document metadata (e.g. document keys, revisions) and
any exceptions, or True if parameter **silent** was set to True.
:rtype: [dict | ArangoServerError] | bool
Expand All @@ -1947,6 +1977,10 @@ def delete_many(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillCaches"] = refill_index_caches

documents = [
self._ensure_key_in_body(doc) if isinstance(doc, dict) else doc
for doc in documents
Expand Down Expand Up @@ -2229,6 +2263,7 @@ def insert(
overwrite_mode: Optional[str] = None,
keep_none: Optional[bool] = None,
merge: Optional[bool] = None,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, Json]]:
"""Insert a new document.

Expand Down Expand Up @@ -2263,6 +2298,10 @@ def insert(
instead of the new one overwriting the old one. Applies only when
**overwrite_mode** is set to "update" (update-insert).
:type merge: bool | None
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document insertions affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: Document metadata (e.g. document key, revision) or True if
parameter **silent** was set to True.
:rtype: bool | dict
Expand All @@ -2285,6 +2324,10 @@ def insert(
if merge is not None:
params["mergeObjects"] = merge

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

request = Request(
method="post",
endpoint=f"/_api/document/{self.name}",
Expand Down Expand Up @@ -2317,6 +2360,7 @@ def update(
return_old: bool = False,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, Json]]:
"""Update a document.

Expand All @@ -2343,6 +2387,10 @@ def update(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document insertions affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: Document metadata (e.g. document key, revision) or True if
parameter **silent** was set to True.
:rtype: bool | dict
Expand All @@ -2361,6 +2409,10 @@ def update(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

request = Request(
method="patch",
endpoint=f"/_api/document/{self._extract_id(document)}",
Expand Down Expand Up @@ -2391,6 +2443,7 @@ def replace(
return_old: bool = False,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, Json]]:
"""Replace a document.

Expand All @@ -2412,6 +2465,10 @@ def replace(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document insertions affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: Document metadata (e.g. document key, revision) or True if
parameter **silent** was set to True.
:rtype: bool | dict
Expand All @@ -2428,6 +2485,10 @@ def replace(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

request = Request(
method="put",
endpoint=f"/_api/document/{self._extract_id(document)}",
Expand Down Expand Up @@ -2461,6 +2522,7 @@ def delete(
return_old: bool = False,
sync: Optional[bool] = None,
silent: bool = False,
refill_index_caches: Optional[bool] = None,
) -> Result[Union[bool, Json]]:
"""Delete a document.

Expand All @@ -2485,6 +2547,10 @@ def delete(
:param silent: If set to True, no document metadata is returned. This
can be used to save resources.
:type silent: bool
:param refill_index_caches: Whether to add new entries to in-memory
index caches if document operations affect the edge index or
cache-enabled persistent indexes.
:type refill_index_caches: bool | None
:return: Document metadata (e.g. document key, revision), or True if
parameter **silent** was set to True, or False if document was not
found and **ignore_missing** was set to True (does not apply in
Expand All @@ -2504,6 +2570,10 @@ def delete(
if sync is not None:
params["waitForSync"] = sync

# New in ArangoDB 3.9.6 and 3.10.2
if refill_index_caches is not None:
params["refillIndexCaches"] = refill_index_caches

request = Request(
method="delete",
endpoint=f"/_api/document/{handle}",
Expand Down
43 changes: 37 additions & 6 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ def test_document_insert(col, docs):
)
assert err.value.error_code == 1210

# Test insert with cache refilling
empty_collection(col)
doc = docs[0]
assert col.insert(doc, refill_index_caches=True)
assert col[doc["_key"]]["_key"] == doc["_key"]
assert col[doc["_key"]]["val"] == doc["val"]


def test_document_insert_many(col, bad_col, docs):
# Test insert_many with default options
Expand Down Expand Up @@ -291,6 +298,13 @@ def test_document_insert_many(col, bad_col, docs):
assert results[0].error_code == 1210
assert "new" in results[1]

# Test insert with cache refilling
empty_collection(col)
assert col.insert_many(docs, refill_index_caches=True)
for doc in docs:
assert col[doc["_key"]]["_key"] == doc["_key"]
assert col[doc["_key"]]["val"] == doc["val"]


def test_document_update(col, docs):
doc = docs[0]
Expand Down Expand Up @@ -422,6 +436,11 @@ def test_document_update(col, docs):
assert col.update(doc, silent=True) is True
assert col[doc["_key"]]["val"] == 8

# Test update with cache refilling
doc["val"] = 9
assert col.update(doc, refill_index_caches=True, check_rev=False)
assert col[doc["_key"]]["val"] == 9


def test_document_update_many(col, bad_col, docs):
col.insert_many(docs)
Expand Down Expand Up @@ -600,6 +619,13 @@ def test_document_update_many(col, bad_col, docs):
for doc in docs:
assert col[doc["_key"]]["val"] == 8

# Test update_many with cache refilling
for doc in docs:
doc["val"] = 9
assert col.update_many(docs, refill_index_caches=True, check_rev=False)
for doc in docs:
assert col[doc["_key"]]["val"] == 9

# Test update_many with bad documents
with assert_raises(DocumentParseError) as err:
bad_col.update_many([{}])
Expand Down Expand Up @@ -734,6 +760,11 @@ def test_document_replace(col, docs):
assert col.replace(doc, silent=True) is True
assert col[doc["_key"]]["val"] == 8

# Test replace with cache refilling
doc["val"] = 9
assert col.replace(doc, refill_index_caches=True, check_rev=False)
assert col[doc["_key"]]["val"] == 9


def test_document_replace_many(col, bad_col, docs):
col.insert_many(docs)
Expand Down Expand Up @@ -817,8 +848,8 @@ def test_document_replace_many(col, bad_col, docs):
assert "foo" not in doc
assert doc["baz"] == 4

# Test replace_many with check_rev set to False
results = col.replace_many(docs, check_rev=False)
# Test replace_many with check_rev set to False and cache refilling
results = col.replace_many(docs, check_rev=False, refill_index_caches=True)
for result, doc in zip(results, docs):
doc_key = doc["_key"]
assert result["_id"] == f"{col.name}/{doc_key}"
Expand Down Expand Up @@ -965,9 +996,9 @@ def test_document_delete(col, docs):
if col.context != "transaction":
assert col.delete(bad_key, ignore_missing=True) is False

# Test delete (document) with silent set to True
# Test delete (document) with silent set to True and cache refilling
doc = docs[5]
assert col.delete(doc, silent=True) is True
assert col.delete(doc, silent=True, refill_index_caches=True) is True
assert doc["_key"] not in col
assert len(col) == 1

Expand Down Expand Up @@ -1029,9 +1060,9 @@ def test_document_delete_many(col, bad_col, docs):
old_revs[doc_key] = result["_rev"]
assert len(col) == 0

# Test delete_many with silent set to True
# Test delete_many with silent set to True and cache refilling
col.import_bulk(docs)
assert col.delete_many(docs, silent=True) is True
assert col.delete_many(docs, silent=True, refill_index_caches=True) is True
assert len(col) == 0

# Test delete_many (documents) with check_rev set to True
Expand Down