From d87e0f3b5685306cb00d913c36a27c27b8baf48f Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Tue, 16 Jan 2024 21:09:53 +0100
Subject: [PATCH 1/5] Cleanup test_serializer.py

---
 test_elasticsearch/test_serializer.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test_elasticsearch/test_serializer.py b/test_elasticsearch/test_serializer.py
index efd56d28f..9dedf31d2 100644
--- a/test_elasticsearch/test_serializer.py
+++ b/test_elasticsearch/test_serializer.py
@@ -36,7 +36,7 @@
 from elasticsearch.serializer import JSONSerializer, TextSerializer
 
 requires_numpy_and_pandas = pytest.mark.skipif(
-    np is None or pd is None, reason="Test requires numpy or pandas to be available"
+    np is None or pd is None, reason="Test requires numpy and pandas to be available"
 )
 
 
@@ -46,11 +46,8 @@ def test_datetime_serialization():
     )
 
 
+@requires_numpy_and_pandas
 def test_decimal_serialization():
-    requires_numpy_and_pandas()
-
-    if sys.version_info[:2] == (2, 6):
-        pytest.skip("Float rounding is broken in 2.6.")
     assert b'{"d":3.8}' == JSONSerializer().dumps({"d": Decimal("3.8")})
 
 

From 48682701d7c2bb5d1b53d08818cda0276b1b7c25 Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Tue, 26 Mar 2024 16:54:30 +0400
Subject: [PATCH 2/5] Add optional orjson serializer support

---
 dev-requirements.txt                  |  1 +
 docs/guide/configuration.asciidoc     | 14 +++++
 elasticsearch/__init__.py             |  6 ++
 elasticsearch/serializer.py           | 11 ++++
 test_elasticsearch/test_serializer.py | 82 ++++++++++++++-------------
 5 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index b5b92c283..77ca77ced 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -18,6 +18,7 @@ nox
 
 numpy
 pandas
+orjson
 
 # Testing the 'search_mvt' API response
 mapbox-vector-tile
diff --git a/docs/guide/configuration.asciidoc b/docs/guide/configuration.asciidoc
index cb467dc0d..3215b35e3 100644
--- a/docs/guide/configuration.asciidoc
+++ b/docs/guide/configuration.asciidoc
@@ -381,6 +381,20 @@ es = Elasticsearch(
 )
 ------------------------------------
 
+If the `orjson` package is installed, you can use the faster ``OrjsonSerializer`` for the default mimetype (``application/json``):
+
+[source,python]
+------------------------------------
+from elasticsearch import Elasticsearch, OrjsonSerializer
+
+es = Elasticsearch(
+    ...,
+    serializer=OrjsonSerializer()
+)
+------------------------------------
+
+It is particularly beneficial to serialize vectors. This will be the default in a future release.
+
 
 [discrete]
 [[nodes]]
diff --git a/elasticsearch/__init__.py b/elasticsearch/__init__.py
index 8b38b3934..0a85f5fb8 100644
--- a/elasticsearch/__init__.py
+++ b/elasticsearch/__init__.py
@@ -62,6 +62,10 @@
     UnsupportedProductError,
 )
 from .serializer import JSONSerializer, JsonSerializer
+try:
+    from .serializer import OrjsonSerializer
+except ModuleNotFoundError:
+    OrjsonSerializer = None
 
 # Only raise one warning per deprecation message so as not
 # to spam up the user if the same action is done multiple times.
@@ -86,6 +90,8 @@
     "UnsupportedProductError",
     "ElasticsearchWarning",
 ]
+if OrjsonSerializer is not None:
+    __all__.append("OrjsonSerializer")
 
 fixup_module_metadata(__name__, globals())
 del fixup_module_metadata
diff --git a/elasticsearch/serializer.py b/elasticsearch/serializer.py
index 758c6b730..baf4e1581 100644
--- a/elasticsearch/serializer.py
+++ b/elasticsearch/serializer.py
@@ -41,6 +41,12 @@
     "MapboxVectorTileSerializer",
 ]
 
+try:
+    from elastic_transport import OrjsonSerializer as _OrjsonSerializer
+    __all__.append("OrjsonSerializer")
+except ModuleNotFoundError:
+    _OrjsonSerializer = None
+
 
 class JsonSerializer(_JsonSerializer):
     mimetype: ClassVar[str] = "application/json"
@@ -73,6 +79,11 @@ def default(self, data: Any) -> Any:
         raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")
 
 
+if _OrjsonSerializer is not None:
+    class OrjsonSerializer(JsonSerializer, _OrjsonSerializer):
+        def default(self, data: Any) -> Any:
+            return JsonSerializer.default(self, data)
+
 class NdjsonSerializer(JsonSerializer, _NdjsonSerializer):
     mimetype: ClassVar[str] = "application/x-ndjson"
 
diff --git a/test_elasticsearch/test_serializer.py b/test_elasticsearch/test_serializer.py
index 9dedf31d2..c94c62677 100644
--- a/test_elasticsearch/test_serializer.py
+++ b/test_elasticsearch/test_serializer.py
@@ -33,38 +33,41 @@
 
 from elasticsearch import Elasticsearch
 from elasticsearch.exceptions import SerializationError
-from elasticsearch.serializer import JSONSerializer, TextSerializer
+from elasticsearch.serializer import JSONSerializer, OrjsonSerializer, TextSerializer
 
 requires_numpy_and_pandas = pytest.mark.skipif(
     np is None or pd is None, reason="Test requires numpy and pandas to be available"
 )
 
+@pytest.fixture(params=[JSONSerializer, OrjsonSerializer])
+def json_serializer(request: pytest.FixtureRequest):
+    yield request.param()
 
-def test_datetime_serialization():
-    assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
+
+def test_datetime_serialization(json_serializer):
+    assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
         {"d": datetime(2010, 10, 1, 2, 30)}
     )
 
 
 @requires_numpy_and_pandas
-def test_decimal_serialization():
-    assert b'{"d":3.8}' == JSONSerializer().dumps({"d": Decimal("3.8")})
+def test_decimal_serialization(json_serializer):
+    assert b'{"d":3.8}' == json_serializer.dumps({"d": Decimal("3.8")})
 
 
-def test_uuid_serialization():
-    assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == JSONSerializer().dumps(
+def test_uuid_serialization(json_serializer):
+    assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == json_serializer.dumps(
         {"d": uuid.UUID("00000000-0000-0000-0000-000000000003")}
     )
 
 
 @requires_numpy_and_pandas
-def test_serializes_numpy_bool():
-    assert b'{"d":true}' == JSONSerializer().dumps({"d": np.bool_(True)})
+def test_serializes_numpy_bool(json_serializer):
+    assert b'{"d":true}' == json_serializer.dumps({"d": np.bool_(True)})
 
 
 @requires_numpy_and_pandas
-def test_serializes_numpy_integers():
-    ser = JSONSerializer()
+def test_serializes_numpy_integers(json_serializer):
     for np_type in (
         np.int_,
         np.int8,
@@ -72,7 +75,7 @@ def test_serializes_numpy_integers():
         np.int32,
         np.int64,
     ):
-        assert ser.dumps({"d": np_type(-1)}) == b'{"d":-1}'
+        assert json_serializer.dumps({"d": np_type(-1)}) == b'{"d":-1}'
 
     for np_type in (
         np.uint8,
@@ -80,91 +83,92 @@ def test_serializes_numpy_integers():
         np.uint32,
         np.uint64,
     ):
-        assert ser.dumps({"d": np_type(1)}) == b'{"d":1}'
+        assert json_serializer.dumps({"d": np_type(1)}) == b'{"d":1}'
 
 
 @requires_numpy_and_pandas
-def test_serializes_numpy_floats():
-    ser = JSONSerializer()
+def test_serializes_numpy_floats(json_serializer):
     for np_type in (
         np.float_,
         np.float32,
         np.float64,
     ):
-        assert re.search(rb'^{"d":1\.2[\d]*}$', ser.dumps({"d": np_type(1.2)}))
+        assert re.search(rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)}))
 
 
 @requires_numpy_and_pandas
-def test_serializes_numpy_datetime():
-    assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
+def test_serializes_numpy_datetime(json_serializer):
+    assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
         {"d": np.datetime64("2010-10-01T02:30:00")}
     )
 
 
 @requires_numpy_and_pandas
-def test_serializes_numpy_ndarray():
-    assert b'{"d":[0,0,0,0,0]}' == JSONSerializer().dumps(
+def test_serializes_numpy_ndarray(json_serializer):
+    assert b'{"d":[0,0,0,0,0]}' == json_serializer.dumps(
         {"d": np.zeros((5,), dtype=np.uint8)}
     )
     # This isn't useful for Elasticsearch, just want to make sure it works.
-    assert b'{"d":[[0,0],[0,0]]}' == JSONSerializer().dumps(
+    assert b'{"d":[[0,0],[0,0]]}' == json_serializer.dumps(
         {"d": np.zeros((2, 2), dtype=np.uint8)}
     )
 
 
 @requires_numpy_and_pandas
 def test_serializes_numpy_nan_to_nan():
-    assert b'{"d":NaN}' == JSONSerializer().dumps({"d": np.nan})
+    assert b'{"d":NaN}' == JSONSerializer().dumps({"d": float("NaN")})
+    # NaN is invalid JSON, and orjson silently converts it to null
+    assert b'{"d":null}' == OrjsonSerializer().dumps({"d": float("NaN")})
 
 
 @requires_numpy_and_pandas
-def test_serializes_pandas_timestamp():
-    assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
+def test_serializes_pandas_timestamp(json_serializer):
+    assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
         {"d": pd.Timestamp("2010-10-01T02:30:00")}
     )
 
 
 @requires_numpy_and_pandas
-def test_serializes_pandas_series():
-    assert b'{"d":["a","b","c","d"]}' == JSONSerializer().dumps(
+def test_serializes_pandas_series(json_serializer):
+    assert b'{"d":["a","b","c","d"]}' == json_serializer.dumps(
         {"d": pd.Series(["a", "b", "c", "d"])}
     )
 
 
 @requires_numpy_and_pandas
 @pytest.mark.skipif(not hasattr(pd, "NA"), reason="pandas.NA is required")
-def test_serializes_pandas_na():
-    assert b'{"d":null}' == JSONSerializer().dumps({"d": pd.NA})
+def test_serializes_pandas_na(json_serializer):
+    assert b'{"d":null}' == json_serializer.dumps({"d": pd.NA})
 
 
 @requires_numpy_and_pandas
 @pytest.mark.skipif(not hasattr(pd, "NaT"), reason="pandas.NaT required")
-def test_raises_serialization_error_pandas_nat():
+def test_raises_serialization_error_pandas_nat(json_serializer):
     with pytest.raises(SerializationError):
-        JSONSerializer().dumps({"d": pd.NaT})
+        json_serializer.dumps({"d": pd.NaT})
 
 
 @requires_numpy_and_pandas
-def test_serializes_pandas_category():
+def test_serializes_pandas_category(json_serializer):
     cat = pd.Categorical(["a", "c", "b", "a"], categories=["a", "b", "c"])
-    assert b'{"d":["a","c","b","a"]}' == JSONSerializer().dumps({"d": cat})
+    assert b'{"d":["a","c","b","a"]}' == json_serializer.dumps({"d": cat})
 
     cat = pd.Categorical([1, 2, 3], categories=[1, 2, 3])
-    assert b'{"d":[1,2,3]}' == JSONSerializer().dumps({"d": cat})
+    assert b'{"d":[1,2,3]}' == json_serializer.dumps({"d": cat})
 
 
-def test_json_raises_serialization_error_on_dump_error():
+def test_json_raises_serialization_error_on_dump_error(json_serializer):
     with pytest.raises(SerializationError):
-        JSONSerializer().dumps(object())
+        json_serializer.dumps(object())
 
 
-def test_raises_serialization_error_on_load_error():
+def test_raises_serialization_error_on_load_error(json_serializer):
     with pytest.raises(SerializationError):
-        JSONSerializer().loads(object())
+        json_serializer.loads(object())
     with pytest.raises(SerializationError):
-        JSONSerializer().loads("")
+        json_serializer.loads("")
     with pytest.raises(SerializationError):
-        JSONSerializer().loads("{{")
+        json_serializer.loads("{{")
 
 
 def test_strings_are_left_untouched():

From 72ea9951ec4caa935a159b3fe567df8accf6f211 Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Tue, 26 Mar 2024 17:13:57 +0400
Subject: [PATCH 3/5] Fix lint

---
 elasticsearch/__init__.py             | 3 ++-
 elasticsearch/serializer.py           | 5 ++++-
 noxfile.py                            | 2 +-
 setup.py                              | 1 +
 test_elasticsearch/test_serializer.py | 6 ++++--
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/elasticsearch/__init__.py b/elasticsearch/__init__.py
index 0a85f5fb8..f4ac43ca7 100644
--- a/elasticsearch/__init__.py
+++ b/elasticsearch/__init__.py
@@ -62,10 +62,11 @@
     UnsupportedProductError,
 )
 from .serializer import JSONSerializer, JsonSerializer
+
 try:
     from .serializer import OrjsonSerializer
 except ModuleNotFoundError:
-    OrjsonSerializer = None
+    OrjsonSerializer = None  # type: ignore[assignment,misc]
 
 # Only raise one warning per deprecation message so as not
 # to spam up the user if the same action is done multiple times.
diff --git a/elasticsearch/serializer.py b/elasticsearch/serializer.py
index baf4e1581..4e6122fff 100644
--- a/elasticsearch/serializer.py
+++ b/elasticsearch/serializer.py
@@ -43,9 +43,10 @@
 
 try:
     from elastic_transport import OrjsonSerializer as _OrjsonSerializer
+
     __all__.append("OrjsonSerializer")
 except ModuleNotFoundError:
-    _OrjsonSerializer = None
+    _OrjsonSerializer = None  # type: ignore[assignment,misc]
 
 
 class JsonSerializer(_JsonSerializer):
@@ -80,10 +81,12 @@ def default(self, data: Any) -> Any:
 
 
 if _OrjsonSerializer is not None:
+
     class OrjsonSerializer(JsonSerializer, _OrjsonSerializer):
         def default(self, data: Any) -> Any:
             return JsonSerializer.default(self, data)
 
+
 class NdjsonSerializer(JsonSerializer, _NdjsonSerializer):
     mimetype: ClassVar[str] = "application/x-ndjson"
 
diff --git a/noxfile.py b/noxfile.py
index b166bf79a..54d8adcff 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -92,7 +92,7 @@ def lint(session):
     session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
 
     # Workaround to make '-r' to still work despite uninstalling aiohttp below.
-    session.install(".[async,requests]", env=INSTALL_ENV)
+    session.install(".[async,requests,orjson]", env=INSTALL_ENV)
 
     # Run mypy on the package and then the type examples separately for
     # the two different mypy use-cases, ourselves and our users.
diff --git a/setup.py b/setup.py
index 7a146da9b..6104768d5 100644
--- a/setup.py
+++ b/setup.py
@@ -97,5 +97,6 @@
     extras_require={
         "requests": ["requests>=2.4.0, <3.0.0"],
         "async": async_requires,
+        "orjson": ["orjson>=3"],
     },
 )
diff --git a/test_elasticsearch/test_serializer.py b/test_elasticsearch/test_serializer.py
index c94c62677..9dc5f31e9 100644
--- a/test_elasticsearch/test_serializer.py
+++ b/test_elasticsearch/test_serializer.py
@@ -16,7 +16,6 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-import sys
 import uuid
 from datetime import datetime
 from decimal import Decimal
@@ -39,6 +38,7 @@
     np is None or pd is None, reason="Test requires numpy and pandas to be available"
 )
 
+
 @pytest.fixture(params=[JSONSerializer, OrjsonSerializer])
 def json_serializer(request: pytest.FixtureRequest):
     yield request.param()
@@ -93,7 +93,9 @@ def test_serializes_numpy_floats(json_serializer):
         np.float32,
         np.float64,
     ):
-        assert re.search(rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)}))
+        assert re.search(
+            rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)})
+        )
 
 
 @requires_numpy_and_pandas

From 56236008a1db5d3336579719c0afe96c2a6d296d Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Tue, 26 Mar 2024 17:14:06 +0400
Subject: [PATCH 4/5] Improve docs

---
 docs/guide/configuration.asciidoc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/guide/configuration.asciidoc b/docs/guide/configuration.asciidoc
index 3215b35e3..06964dac9 100644
--- a/docs/guide/configuration.asciidoc
+++ b/docs/guide/configuration.asciidoc
@@ -393,8 +393,12 @@ es = Elasticsearch(
 )
 ------------------------------------
 
-It is particularly beneficial to serialize vectors. This will be the default in a future release.
+orjson is particularly fast when serializing vectors as it has native numpy support. This will be the default in a future release. Note that you can install orjson with the `orjson` extra:
 
+[source,sh]
+--------------------------------------------
+$ python -m pip install elasticsearch[orjson]
+--------------------------------------------
 
 [discrete]
 [[nodes]]

From b3ffd73ca0a912d022dfd1994f4a5f7447a64774 Mon Sep 17 00:00:00 2001
From: Quentin Pradet <quentin.pradet@elastic.co>
Date: Tue, 26 Mar 2024 17:40:21 +0400
Subject: [PATCH 5/5] Add orjson extra in tests

---
 noxfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 54d8adcff..bf88249bd 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -48,7 +48,7 @@ def pytest_argv():
 
 @nox.session(python=["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"])
 def test(session):
-    session.install(".[async,requests]", env=INSTALL_ENV, silent=False)
+    session.install(".[async,requests,orjson]", env=INSTALL_ENV, silent=False)
     session.install("-r", "dev-requirements.txt", silent=False)
 
     session.run(*pytest_argv())