From fcf2a5b4fbe7535b632084eff6365cc2f87303ce Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 6 Nov 2020 19:31:27 +0100 Subject: [PATCH 01/10] accelerate plotly JSON encoder for numpy arrays without nans --- packages/python/plotly/_plotly_utils/utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index cbf8d3a6b98..ee35a273e6e 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -40,10 +40,11 @@ def encode(self, o): Note that setting invalid separators will cause a failure at this step. """ - # this will raise errors in a normal-expected way + self.unsafe = False encoded_o = super(PlotlyJSONEncoder, self).encode(o) - + if self.unsafe: + return encoded_o # now: # 1. `loads` to switch Infinity, -Infinity, NaN to None # 2. `dumps` again so you get 'null' instead of extended JSON @@ -95,6 +96,12 @@ def default(self, obj): Therefore, we only anticipate either unknown iterables or values here. """ + numpy = get_module("numpy", should_load=False) + if numpy: + if isinstance(obj, numpy.ndarray): + if numpy.all(numpy.isfinite(obj)): + self.unsafe = True + # TODO: The ordering if these methods is *very* important. Is this OK? encoding_methods = ( self.encode_as_plotly, From 0fa810fe6e4740730787da4276f37df526b9f87a Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 6 Nov 2020 21:34:00 +0100 Subject: [PATCH 02/10] limit change to numerical data types --- packages/python/plotly/_plotly_utils/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index ee35a273e6e..d975169d1e9 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -98,7 +98,9 @@ def default(self, obj): """ numpy = get_module("numpy", should_load=False) if numpy: - if isinstance(obj, numpy.ndarray): + if isinstance(obj, numpy.ndarray) and numpy.issubdtype( + obj.dtype, numpy.number + ): if numpy.all(numpy.isfinite(obj)): self.unsafe = True From 561edf17e58dc5a2405763b6c114a9e1be8094f8 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 13 Nov 2020 22:44:11 +0100 Subject: [PATCH 03/10] solution that works for 99% of cases but lists with composite types won't work --- packages/python/plotly/_plotly_utils/utils.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index d975169d1e9..dd2af7cea76 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -41,9 +41,9 @@ def encode(self, o): """ # this will raise errors in a normal-expected way - self.unsafe = False + self.hasinfnans = False encoded_o = super(PlotlyJSONEncoder, self).encode(o) - if self.unsafe: + if not self.hasinfnans: return encoded_o # now: # 1. `loads` to switch Infinity, -Infinity, NaN to None @@ -97,13 +97,20 @@ def default(self, obj): """ numpy = get_module("numpy", should_load=False) + # Try to detect any nans of infs by aggressively converting to numpy + # (catching any errors resulting from this conversion) + # and checking with np.isfinite if numpy: - if isinstance(obj, numpy.ndarray) and numpy.issubdtype( - obj.dtype, numpy.number - ): - if numpy.all(numpy.isfinite(obj)): - self.unsafe = True - + try: + obj_as_numpy = numpy.asanyarray(obj) + try: + is_finite = numpy.all(numpy.isfinite(obj_as_numpy)) + if not is_finite: + self.hasinfnans = True + except TypeError: + pass + except: + pass # TODO: The ordering if these methods is *very* important. Is this OK? encoding_methods = ( self.encode_as_plotly, From 8392ead692e7b9934c7ee8b89943eb9012df4669 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 13 Nov 2020 23:10:20 +0100 Subject: [PATCH 04/10] other method: brute force string matching of Infinity or NaN --- packages/python/plotly/_plotly_utils/utils.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index dd2af7cea76..b4fcfd8afd8 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -43,7 +43,11 @@ def encode(self, o): # this will raise errors in a normal-expected way self.hasinfnans = False encoded_o = super(PlotlyJSONEncoder, self).encode(o) - if not self.hasinfnans: + # Brute force guessing whether NaN or Infinity values are in the string + # We catch false positive cases (e.g. strings such as titles, labels etc.) + # but this is ok since the intention is to skip the decoding / reencoding + # step when it's completely safe + if not ('Infinity' in encoded_o or 'NaN' in encoded_o): return encoded_o # now: # 1. `loads` to switch Infinity, -Infinity, NaN to None @@ -96,21 +100,6 @@ def default(self, obj): Therefore, we only anticipate either unknown iterables or values here. """ - numpy = get_module("numpy", should_load=False) - # Try to detect any nans of infs by aggressively converting to numpy - # (catching any errors resulting from this conversion) - # and checking with np.isfinite - if numpy: - try: - obj_as_numpy = numpy.asanyarray(obj) - try: - is_finite = numpy.all(numpy.isfinite(obj_as_numpy)) - if not is_finite: - self.hasinfnans = True - except TypeError: - pass - except: - pass # TODO: The ordering if these methods is *very* important. Is this OK? encoding_methods = ( self.encode_as_plotly, From 627c00fd4d935070321aa1e6304c572c95b59eab Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Fri, 13 Nov 2020 23:13:53 +0100 Subject: [PATCH 05/10] black --- packages/python/plotly/_plotly_utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index b4fcfd8afd8..875008458a8 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -47,7 +47,7 @@ def encode(self, o): # We catch false positive cases (e.g. strings such as titles, labels etc.) # but this is ok since the intention is to skip the decoding / reencoding # step when it's completely safe - if not ('Infinity' in encoded_o or 'NaN' in encoded_o): + if not ("Infinity" in encoded_o or "NaN" in encoded_o): return encoded_o # now: # 1. `loads` to switch Infinity, -Infinity, NaN to None From c8e2922e6fc69bea314dc5f331b4712ff8689823 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sun, 15 Nov 2020 22:00:40 +0100 Subject: [PATCH 06/10] added test --- .../tests/test_core/test_utils/test_utils.py | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py b/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py index a3732d85256..5da3ed5cf5f 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py +++ b/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py @@ -1,11 +1,13 @@ from __future__ import absolute_import -from inspect import getargspec from unittest import TestCase import json as _json from plotly.utils import PlotlyJSONEncoder, get_by_path, node_generator +from time import time +import numpy as np +import plotly.graph_objects as go class TestJSONEncoder(TestCase): @@ -19,6 +21,38 @@ def test_invalid_encode_exception(self): with self.assertRaises(TypeError): _json.dumps({"a": {1}}, cls=PlotlyJSONEncoder) + def test_fast_track_finite_arrays(self): + # if NaN or Infinity is found in the json dump + # of a figure, it is decoded and re-encoded to replace these values + # with null. This test checks that NaN and Infinity values are + # indeed converted to null, and that the encoding of figures + # without inf or nan is faster (because we can avoid decoding + # and reencoding). + z = np.random.randn(100, 100) + x = np.arange(100.) + fig_1 = go.Figure(go.Heatmap(z=z, x=x)) + t1 = time() + json_str_1 = _json.dumps(fig_1, cls=PlotlyJSONEncoder) + t2 = time() + x[0] = np.nan + x[1] = np.inf + fig_2 = go.Figure(go.Heatmap(z=z, x=x)) + t3 = time() + json_str_2 = _json.dumps(fig_2, cls=PlotlyJSONEncoder) + t4 = time() + assert(t2 - t1 < t4 - t3) + assert 'null' in json_str_2 + assert 'NaN' not in json_str_2 + assert 'Infinity' not in json_str_2 + x = np.arange(100.) + fig_3 = go.Figure(go.Heatmap(z=z, x=x)) + fig_3.update_layout(title_text='Infinity') + t5 = time() + json_str_3 = _json.dumps(fig_3, cls=PlotlyJSONEncoder) + t6 = time() + assert(t2 - t1 < t6 - t5) + assert 'Infinity' in json_str_3 + class TestGetByPath(TestCase): def test_get_by_path(self): From 224082ec8f5d7176a83fbe7b414a0470cd8e1b30 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Sun, 15 Nov 2020 22:10:34 +0100 Subject: [PATCH 07/10] black --- .../tests/test_core/test_utils/test_utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py b/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py index 5da3ed5cf5f..6122f27e7ee 100644 --- a/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py +++ b/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py @@ -29,7 +29,7 @@ def test_fast_track_finite_arrays(self): # without inf or nan is faster (because we can avoid decoding # and reencoding). z = np.random.randn(100, 100) - x = np.arange(100.) + x = np.arange(100.0) fig_1 = go.Figure(go.Heatmap(z=z, x=x)) t1 = time() json_str_1 = _json.dumps(fig_1, cls=PlotlyJSONEncoder) @@ -40,18 +40,18 @@ def test_fast_track_finite_arrays(self): t3 = time() json_str_2 = _json.dumps(fig_2, cls=PlotlyJSONEncoder) t4 = time() - assert(t2 - t1 < t4 - t3) - assert 'null' in json_str_2 - assert 'NaN' not in json_str_2 - assert 'Infinity' not in json_str_2 - x = np.arange(100.) + assert t2 - t1 < t4 - t3 + assert "null" in json_str_2 + assert "NaN" not in json_str_2 + assert "Infinity" not in json_str_2 + x = np.arange(100.0) fig_3 = go.Figure(go.Heatmap(z=z, x=x)) - fig_3.update_layout(title_text='Infinity') + fig_3.update_layout(title_text="Infinity") t5 = time() json_str_3 = _json.dumps(fig_3, cls=PlotlyJSONEncoder) t6 = time() - assert(t2 - t1 < t6 - t5) - assert 'Infinity' in json_str_3 + assert t2 - t1 < t6 - t5 + assert "Infinity" in json_str_3 class TestGetByPath(TestCase): From 5b18b2d7b7e9a842a6ac73c897b56f690f47b139 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 16 Nov 2020 13:25:13 +0100 Subject: [PATCH 08/10] removed ununsed variable --- packages/python/plotly/_plotly_utils/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index 875008458a8..d4f79bd7dff 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -41,7 +41,6 @@ def encode(self, o): """ # this will raise errors in a normal-expected way - self.hasinfnans = False encoded_o = super(PlotlyJSONEncoder, self).encode(o) # Brute force guessing whether NaN or Infinity values are in the string # We catch false positive cases (e.g. strings such as titles, labels etc.) From e5282714bb1989319057a3651acaa20df6c3fed2 Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Mon, 16 Nov 2020 13:31:59 +0100 Subject: [PATCH 09/10] changelog entry --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60cda725b32..d6153b9d6ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,11 @@ This project adheres to [Semantic Versioning](http://semver.org/). ### Updated +- The JSON serialization of plotly figures had been accelerated by handling + differently figures with and without NaN and Inf values ([#2880](https://github.com/plotly/plotly.py/pull/2880)). + +### Updated + - Updated Plotly.js to version 1.55.2. See the [plotly.js CHANGELOG](https://github.com/plotly/plotly.js/blob/v1.55.2/CHANGELOG.md) for more information. These changes are reflected in the auto-generated `plotly.graph_objects` module. - `px.imshow` has a new `binary_string` boolean argument, which passes the image data as a b64 binary string when True. Using binary strings allow for From c46bae010d151d11299838c4dcd851aee06c6c0a Mon Sep 17 00:00:00 2001 From: Emmanuelle Gouillart Date: Tue, 17 Nov 2020 13:26:42 +0100 Subject: [PATCH 10/10] NaN tested before Infinity --- packages/python/plotly/_plotly_utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py index d4f79bd7dff..16b74b098f7 100644 --- a/packages/python/plotly/_plotly_utils/utils.py +++ b/packages/python/plotly/_plotly_utils/utils.py @@ -46,7 +46,7 @@ def encode(self, o): # We catch false positive cases (e.g. strings such as titles, labels etc.) # but this is ok since the intention is to skip the decoding / reencoding # step when it's completely safe - if not ("Infinity" in encoded_o or "NaN" in encoded_o): + if not ("NaN" in encoded_o or "Infinity" in encoded_o): return encoded_o # now: # 1. `loads` to switch Infinity, -Infinity, NaN to None