From 7de3854fdc7f20c72b2c454ce285e81d201aaa52 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 10 Apr 2025 11:13:49 +0100 Subject: [PATCH] use internal aggregation range class for date aggregation ranges --- elasticsearch/dsl/query.py | 2 +- elasticsearch/dsl/types.py | 62 ++++++++++++++++++++++++++++++++++---- utils/dsl-generator.py | 12 ++++---- 3 files changed, 63 insertions(+), 13 deletions(-) diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py index 1282d3b02..b2f4eb3cc 100644 --- a/elasticsearch/dsl/query.py +++ b/elasticsearch/dsl/query.py @@ -1084,7 +1084,7 @@ class Knn(Query): :arg similarity: The minimum similarity for a vector to be considered a match :arg rescore_vector: Apply oversampling and rescoring to quantized - vectors * + vectors :arg boost: Floating point number used to decrease or increase the relevance scores of the query. Boost values are relative to the default value of 1.0. A boost value between 0 and 1.0 decreases diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 772e596cd..9dba981e4 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -324,31 +324,58 @@ class DenseVectorIndexOptions(AttrDict[Any]): `int4_flat` index types. :arg ef_construction: The number of candidates to track while assembling the list of nearest neighbors for each new node. Only - applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. - Defaults to `100` if omitted. + applicable to `hnsw`, `int8_hnsw`, `bbq_hnsw`, and `int4_hnsw` + index types. Defaults to `100` if omitted. :arg m: The number of neighbors each node will be connected to in the - HNSW graph. Only applicable to `hnsw`, `int8_hnsw`, and - `int4_hnsw` index types. Defaults to `16` if omitted. + HNSW graph. Only applicable to `hnsw`, `int8_hnsw`, `bbq_hnsw`, + and `int4_hnsw` index types. Defaults to `16` if omitted. + :arg rescore_vector: The rescore vector options. This is only + applicable to `bbq_hnsw`, `int4_hnsw`, `int8_hnsw`, `bbq_flat`, + `int4_flat`, and `int8_flat` index types. """ type: Union[ - Literal["flat", "hnsw", "int4_flat", "int4_hnsw", "int8_flat", "int8_hnsw"], + Literal[ + "bbq_flat", + "bbq_hnsw", + "flat", + "hnsw", + "int4_flat", + "int4_hnsw", + "int8_flat", + "int8_hnsw", + ], DefaultType, ] confidence_interval: Union[float, DefaultType] ef_construction: Union[int, DefaultType] m: Union[int, DefaultType] + rescore_vector: Union[ + "DenseVectorIndexOptionsRescoreVector", Dict[str, Any], DefaultType + ] def __init__( self, *, type: Union[ - Literal["flat", "hnsw", "int4_flat", "int4_hnsw", "int8_flat", "int8_hnsw"], + Literal[ + "bbq_flat", + "bbq_hnsw", + "flat", + "hnsw", + "int4_flat", + "int4_hnsw", + "int8_flat", + "int8_hnsw", + ], DefaultType, ] = DEFAULT, confidence_interval: Union[float, DefaultType] = DEFAULT, ef_construction: Union[int, DefaultType] = DEFAULT, m: Union[int, DefaultType] = DEFAULT, + rescore_vector: Union[ + "DenseVectorIndexOptionsRescoreVector", Dict[str, Any], DefaultType + ] = DEFAULT, **kwargs: Any, ): if type is not DEFAULT: @@ -359,6 +386,29 @@ def __init__( kwargs["ef_construction"] = ef_construction if m is not DEFAULT: kwargs["m"] = m + if rescore_vector is not DEFAULT: + kwargs["rescore_vector"] = rescore_vector + super().__init__(kwargs) + + +class DenseVectorIndexOptionsRescoreVector(AttrDict[Any]): + """ + :arg oversample: (required) The oversampling factor to use when + searching for the nearest neighbor. This is only applicable to the + quantized formats: `bbq_*`, `int4_*`, and `int8_*`. When provided, + `oversample * k` vectors will be gathered and then their scores + will be re-computed with the original vectors. valid values are + between `1.0` and `10.0` (inclusive), or `0` exactly to disable + oversampling. + """ + + oversample: Union[float, DefaultType] + + def __init__( + self, *, oversample: Union[float, DefaultType] = DEFAULT, **kwargs: Any + ): + if oversample is not DEFAULT: + kwargs["oversample"] = oversample super().__init__(kwargs) diff --git a/utils/dsl-generator.py b/utils/dsl-generator.py index 2aa12c53d..f18991dfc 100644 --- a/utils/dsl-generator.py +++ b/utils/dsl-generator.py @@ -345,12 +345,12 @@ def get_python_type(self, schema_type, for_response=False): ]["name"].endswith("Analyzer"): # not expanding analyzers at this time, maybe in the future return "str, Dict[str, Any]", None - elif ( - schema_type["name"]["namespace"] == "_types.aggregations" - and schema_type["name"]["name"].endswith("AggregationRange") - and schema_type["name"]["name"] != "IpRangeAggregationRange" - ): - return '"wrappers.AggregationRange"', None + elif schema_type["name"]["namespace"] == "_types.aggregations": + if ( + schema_type["name"]["name"].endswith("AggregationRange") + or schema_type["name"]["name"] == "DateRangeExpression" + ) and schema_type["name"]["name"] != "IpRangeAggregationRange": + return '"wrappers.AggregationRange"', None # to handle other interfaces we generate a type of the same name # and add the interface to the interfaces.py module