Skip to content

Commit 6889515

Browse files
pquentingithub-actions[bot]
authored andcommitted
Add optional orjson serializer support (#2493)
* Cleanup test_serializer.py * Add optional orjson serializer support * Fix lint * Improve docs * Add orjson extra in tests (cherry picked from commit bfe7edd)
1 parent 3f40047 commit 6889515

File tree

7 files changed

+91
-47
lines changed

7 files changed

+91
-47
lines changed

dev-requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ nox
1818

1919
numpy
2020
pandas
21+
orjson
2122

2223
# Testing the 'search_mvt' API response
2324
mapbox-vector-tile

docs/guide/configuration.asciidoc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,24 @@ client = Elasticsearch(
381381
)
382382
------------------------------------
383383

384+
If the `orjson` package is installed, you can use the faster ``OrjsonSerializer`` for the default mimetype (``application/json``):
385+
386+
[source,python]
387+
------------------------------------
388+
from elasticsearch import Elasticsearch, OrjsonSerializer
389+
390+
es = Elasticsearch(
391+
...,
392+
serializer=OrjsonSerializer()
393+
)
394+
------------------------------------
395+
396+
orjson is particularly fast when serializing vectors as it has native numpy support. This will be the default in a future release. Note that you can install orjson with the `orjson` extra:
397+
398+
[source,sh]
399+
--------------------------------------------
400+
$ python -m pip install elasticsearch[orjson]
401+
--------------------------------------------
384402

385403
[discrete]
386404
[[nodes]]

elasticsearch/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
)
6464
from .serializer import JSONSerializer, JsonSerializer
6565

66+
try:
67+
from .serializer import OrjsonSerializer
68+
except ModuleNotFoundError:
69+
OrjsonSerializer = None # type: ignore[assignment,misc]
70+
6671
# Only raise one warning per deprecation message so as not
6772
# to spam up the user if the same action is done multiple times.
6873
warnings.simplefilter("default", category=ElasticsearchWarning, append=True)
@@ -86,6 +91,8 @@
8691
"UnsupportedProductError",
8792
"ElasticsearchWarning",
8893
]
94+
if OrjsonSerializer is not None:
95+
__all__.append("OrjsonSerializer")
8996

9097
fixup_module_metadata(__name__, globals())
9198
del fixup_module_metadata

elasticsearch/serializer.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@
4141
"MapboxVectorTileSerializer",
4242
]
4343

44+
try:
45+
from elastic_transport import OrjsonSerializer as _OrjsonSerializer
46+
47+
__all__.append("OrjsonSerializer")
48+
except ModuleNotFoundError:
49+
_OrjsonSerializer = None # type: ignore[assignment,misc]
50+
4451

4552
class JsonSerializer(_JsonSerializer):
4653
mimetype: ClassVar[str] = "application/json"
@@ -73,6 +80,13 @@ def default(self, data: Any) -> Any:
7380
raise TypeError(f"Unable to serialize {data!r} (type: {type(data)})")
7481

7582

83+
if _OrjsonSerializer is not None:
84+
85+
class OrjsonSerializer(JsonSerializer, _OrjsonSerializer):
86+
def default(self, data: Any) -> Any:
87+
return JsonSerializer.default(self, data)
88+
89+
7690
class NdjsonSerializer(JsonSerializer, _NdjsonSerializer):
7791
mimetype: ClassVar[str] = "application/x-ndjson"
7892

noxfile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def pytest_argv():
4848

4949
@nox.session(python=["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"])
5050
def test(session):
51-
session.install(".[async,requests]", env=INSTALL_ENV, silent=False)
51+
session.install(".[async,requests,orjson]", env=INSTALL_ENV, silent=False)
5252
session.install("-r", "dev-requirements.txt", silent=False)
5353

5454
session.run(*pytest_argv())
@@ -92,7 +92,7 @@ def lint(session):
9292
session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
9393

9494
# Workaround to make '-r' to still work despite uninstalling aiohttp below.
95-
session.install(".[async,requests]", env=INSTALL_ENV)
95+
session.install(".[async,requests,orjson]", env=INSTALL_ENV)
9696

9797
# Run mypy on the package and then the type examples separately for
9898
# the two different mypy use-cases, ourselves and our users.

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,5 +97,6 @@
9797
extras_require={
9898
"requests": ["requests>=2.4.0, <3.0.0"],
9999
"async": async_requires,
100+
"orjson": ["orjson>=3"],
100101
},
101102
)

test_elasticsearch/test_serializer.py

Lines changed: 48 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
import sys
2019
import uuid
2120
from datetime import datetime
2221
from decimal import Decimal
@@ -33,141 +32,145 @@
3332

3433
from elasticsearch import Elasticsearch
3534
from elasticsearch.exceptions import SerializationError
36-
from elasticsearch.serializer import JSONSerializer, TextSerializer
35+
from elasticsearch.serializer import JSONSerializer, OrjsonSerializer, TextSerializer
3736

3837
requires_numpy_and_pandas = pytest.mark.skipif(
39-
np is None or pd is None, reason="Test requires numpy or pandas to be available"
38+
np is None or pd is None, reason="Test requires numpy and pandas to be available"
4039
)
4140

4241

43-
def test_datetime_serialization():
44-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
42+
@pytest.fixture(params=[JSONSerializer, OrjsonSerializer])
43+
def json_serializer(request: pytest.FixtureRequest):
44+
yield request.param()
45+
46+
47+
def test_datetime_serialization(json_serializer):
48+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
4549
{"d": datetime(2010, 10, 1, 2, 30)}
4650
)
4751

4852

49-
def test_decimal_serialization():
50-
requires_numpy_and_pandas()
51-
52-
if sys.version_info[:2] == (2, 6):
53-
pytest.skip("Float rounding is broken in 2.6.")
54-
assert b'{"d":3.8}' == JSONSerializer().dumps({"d": Decimal("3.8")})
53+
@requires_numpy_and_pandas
54+
def test_decimal_serialization(json_serializer):
55+
assert b'{"d":3.8}' == json_serializer.dumps({"d": Decimal("3.8")})
5556

5657

57-
def test_uuid_serialization():
58-
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == JSONSerializer().dumps(
58+
def test_uuid_serialization(json_serializer):
59+
assert b'{"d":"00000000-0000-0000-0000-000000000003"}' == json_serializer.dumps(
5960
{"d": uuid.UUID("00000000-0000-0000-0000-000000000003")}
6061
)
6162

6263

6364
@requires_numpy_and_pandas
64-
def test_serializes_numpy_bool():
65-
assert b'{"d":true}' == JSONSerializer().dumps({"d": np.bool_(True)})
65+
def test_serializes_numpy_bool(json_serializer):
66+
assert b'{"d":true}' == json_serializer.dumps({"d": np.bool_(True)})
6667

6768

6869
@requires_numpy_and_pandas
69-
def test_serializes_numpy_integers():
70-
ser = JSONSerializer()
70+
def test_serializes_numpy_integers(json_serializer):
7171
for np_type in (
7272
np.int_,
7373
np.int8,
7474
np.int16,
7575
np.int32,
7676
np.int64,
7777
):
78-
assert ser.dumps({"d": np_type(-1)}) == b'{"d":-1}'
78+
assert json_serializer.dumps({"d": np_type(-1)}) == b'{"d":-1}'
7979

8080
for np_type in (
8181
np.uint8,
8282
np.uint16,
8383
np.uint32,
8484
np.uint64,
8585
):
86-
assert ser.dumps({"d": np_type(1)}) == b'{"d":1}'
86+
assert json_serializer.dumps({"d": np_type(1)}) == b'{"d":1}'
8787

8888

8989
@requires_numpy_and_pandas
90-
def test_serializes_numpy_floats():
91-
ser = JSONSerializer()
90+
def test_serializes_numpy_floats(json_serializer):
9291
for np_type in (
9392
np.float_,
9493
np.float32,
9594
np.float64,
9695
):
97-
assert re.search(rb'^{"d":1\.2[\d]*}$', ser.dumps({"d": np_type(1.2)}))
96+
assert re.search(
97+
rb'^{"d":1\.2[\d]*}$', json_serializer.dumps({"d": np_type(1.2)})
98+
)
9899

99100

100101
@requires_numpy_and_pandas
101-
def test_serializes_numpy_datetime():
102-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
102+
def test_serializes_numpy_datetime(json_serializer):
103+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
103104
{"d": np.datetime64("2010-10-01T02:30:00")}
104105
)
105106

106107

107108
@requires_numpy_and_pandas
108-
def test_serializes_numpy_ndarray():
109-
assert b'{"d":[0,0,0,0,0]}' == JSONSerializer().dumps(
109+
def test_serializes_numpy_ndarray(json_serializer):
110+
assert b'{"d":[0,0,0,0,0]}' == json_serializer.dumps(
110111
{"d": np.zeros((5,), dtype=np.uint8)}
111112
)
112113
# This isn't useful for Elasticsearch, just want to make sure it works.
113-
assert b'{"d":[[0,0],[0,0]]}' == JSONSerializer().dumps(
114+
assert b'{"d":[[0,0],[0,0]]}' == json_serializer.dumps(
114115
{"d": np.zeros((2, 2), dtype=np.uint8)}
115116
)
116117

117118

118119
@requires_numpy_and_pandas
119120
def test_serializes_numpy_nan_to_nan():
120-
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": np.nan})
121+
assert b'{"d":NaN}' == JSONSerializer().dumps({"d": float("NaN")})
122+
# NaN is invalid JSON, and orjson silently converts it to null
123+
assert b'{"d":null}' == OrjsonSerializer().dumps({"d": float("NaN")})
121124

122125

123126
@requires_numpy_and_pandas
124-
def test_serializes_pandas_timestamp():
125-
assert b'{"d":"2010-10-01T02:30:00"}' == JSONSerializer().dumps(
127+
def test_serializes_pandas_timestamp(json_serializer):
128+
assert b'{"d":"2010-10-01T02:30:00"}' == json_serializer.dumps(
126129
{"d": pd.Timestamp("2010-10-01T02:30:00")}
127130
)
128131

129132

130133
@requires_numpy_and_pandas
131-
def test_serializes_pandas_series():
132-
assert b'{"d":["a","b","c","d"]}' == JSONSerializer().dumps(
134+
def test_serializes_pandas_series(json_serializer):
135+
assert b'{"d":["a","b","c","d"]}' == json_serializer.dumps(
133136
{"d": pd.Series(["a", "b", "c", "d"])}
134137
)
135138

136139

137140
@requires_numpy_and_pandas
138141
@pytest.mark.skipif(not hasattr(pd, "NA"), reason="pandas.NA is required")
139-
def test_serializes_pandas_na():
140-
assert b'{"d":null}' == JSONSerializer().dumps({"d": pd.NA})
142+
def test_serializes_pandas_na(json_serializer):
143+
assert b'{"d":null}' == json_serializer.dumps({"d": pd.NA})
141144

142145

143146
@requires_numpy_and_pandas
144147
@pytest.mark.skipif(not hasattr(pd, "NaT"), reason="pandas.NaT required")
145-
def test_raises_serialization_error_pandas_nat():
148+
def test_raises_serialization_error_pandas_nat(json_serializer):
146149
with pytest.raises(SerializationError):
147-
JSONSerializer().dumps({"d": pd.NaT})
150+
json_serializer.dumps({"d": pd.NaT})
148151

149152

150153
@requires_numpy_and_pandas
151-
def test_serializes_pandas_category():
154+
def test_serializes_pandas_category(json_serializer):
152155
cat = pd.Categorical(["a", "c", "b", "a"], categories=["a", "b", "c"])
153-
assert b'{"d":["a","c","b","a"]}' == JSONSerializer().dumps({"d": cat})
156+
assert b'{"d":["a","c","b","a"]}' == json_serializer.dumps({"d": cat})
154157

155158
cat = pd.Categorical([1, 2, 3], categories=[1, 2, 3])
156-
assert b'{"d":[1,2,3]}' == JSONSerializer().dumps({"d": cat})
159+
assert b'{"d":[1,2,3]}' == json_serializer.dumps({"d": cat})
157160

158161

159-
def test_json_raises_serialization_error_on_dump_error():
162+
def test_json_raises_serialization_error_on_dump_error(json_serializer):
160163
with pytest.raises(SerializationError):
161-
JSONSerializer().dumps(object())
164+
json_serializer.dumps(object())
162165

163166

164-
def test_raises_serialization_error_on_load_error():
167+
def test_raises_serialization_error_on_load_error(json_serializer):
165168
with pytest.raises(SerializationError):
166-
JSONSerializer().loads(object())
169+
json_serializer.loads(object())
167170
with pytest.raises(SerializationError):
168-
JSONSerializer().loads("")
171+
json_serializer.loads("")
169172
with pytest.raises(SerializationError):
170-
JSONSerializer().loads("{{")
173+
json_serializer.loads("{{")
171174

172175

173176
def test_strings_are_left_untouched():

0 commit comments

Comments
 (0)