From 53c27b931593b667eac7890e8d3aa573eb4e0b0c Mon Sep 17 00:00:00 2001 From: Anthony Shaw Date: Wed, 29 Jan 2025 09:34:18 +1100 Subject: [PATCH 1/3] Use builtin array to convert f32 bytes to float list --- src/openai/resources/embeddings.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py index 382a42340e..98f554f0b1 100644 --- a/src/openai/resources/embeddings.py +++ b/src/openai/resources/embeddings.py @@ -2,6 +2,7 @@ from __future__ import annotations +import array import base64 from typing import List, Union, Iterable, cast from typing_extensions import Literal @@ -113,8 +114,8 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: for embedding in obj.data: data = cast(object, embedding.embedding) if not isinstance(data, str): - # numpy is not installed / base64 optimisation isn't enabled for this model yet - continue + # numpy is not installed / use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] base64.b64decode(data), dtype="float32" @@ -226,8 +227,8 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: for embedding in obj.data: data = cast(object, embedding.embedding) if not isinstance(data, str): - # numpy is not installed / base64 optimisation isn't enabled for this model yet - continue + # numpy is not installed / use array for base64 optimisation + embedding.embedding = array.array("f", base64.b64decode(data)).tolist() embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] base64.b64decode(data), dtype="float32" From 8c8c1c00c57899cfdaf19665bb861e9ef2cc4fe4 Mon Sep 17 00:00:00 2001 From: Anthony Shaw Date: Wed, 29 Jan 2025 09:47:56 +1100 Subject: [PATCH 2/3] adjust logic when you don't have numpy and haven't overriden the encoding format --- src/openai/resources/embeddings.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py index 98f554f0b1..3b08414d5d 100644 --- a/src/openai/resources/embeddings.py +++ b/src/openai/resources/embeddings.py @@ -103,7 +103,7 @@ def create( "dimensions": dimensions, "encoding_format": encoding_format, } - if not is_given(encoding_format) and has_numpy(): + if not is_given(encoding_format): params["encoding_format"] = "base64" def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: @@ -114,7 +114,9 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: for embedding in obj.data: data = cast(object, embedding.embedding) if not isinstance(data, str): - # numpy is not installed / use array for base64 optimisation + continue + if not has_numpy(): + # use array for base64 optimisation embedding.embedding = array.array("f", base64.b64decode(data)).tolist() embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] @@ -216,7 +218,7 @@ async def create( "dimensions": dimensions, "encoding_format": encoding_format, } - if not is_given(encoding_format) and has_numpy(): + if not is_given(encoding_format): params["encoding_format"] = "base64" def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: @@ -227,7 +229,9 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: for embedding in obj.data: data = cast(object, embedding.embedding) if not isinstance(data, str): - # numpy is not installed / use array for base64 optimisation + continue + if not has_numpy(): + # use array for base64 optimisation embedding.embedding = array.array("f", base64.b64decode(data)).tolist() embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] From 966c4f92dd8a83b2cbda0cd1f85f242fe7a6e191 Mon Sep 17 00:00:00 2001 From: Anthony Shaw Date: Wed, 29 Jan 2025 10:00:05 +1100 Subject: [PATCH 3/3] adjust conditional flow --- src/openai/resources/embeddings.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py index 3b08414d5d..a392d5eb17 100644 --- a/src/openai/resources/embeddings.py +++ b/src/openai/resources/embeddings.py @@ -118,10 +118,10 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: if not has_numpy(): # use array for base64 optimisation embedding.embedding = array.array("f", base64.b64decode(data)).tolist() - - embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] - base64.b64decode(data), dtype="float32" - ).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() return obj @@ -233,10 +233,10 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse: if not has_numpy(): # use array for base64 optimisation embedding.embedding = array.array("f", base64.b64decode(data)).tolist() - - embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] - base64.b64decode(data), dtype="float32" - ).tolist() + else: + embedding.embedding = np.frombuffer( # type: ignore[no-untyped-call] + base64.b64decode(data), dtype="float32" + ).tolist() return obj