From bdc5212304939a992691f3f7e62f4285e54c3f01 Mon Sep 17 00:00:00 2001 From: Seth Weidman Date: Thu, 10 Oct 2019 13:28:28 -0700 Subject: [PATCH] Update dynamic quantization tutorial --- advanced_source/dynamic_quantization_tutorial.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py index b26dc079cb8..f0b36eb9dd8 100644 --- a/advanced_source/dynamic_quantization_tutorial.py +++ b/advanced_source/dynamic_quantization_tutorial.py @@ -269,6 +269,11 @@ def print_size_of_model(model): ###################################################################### # Second, we see faster inference time, with no difference in evaluation loss: +# +# Note: we number of threads to one for single threaded comparison, since quantized +# models run single threaded. + +torch.set_num_threads(1) def time_model_evaluation(model, test_data): s = time.time() @@ -280,6 +285,9 @@ def time_model_evaluation(model, test_data): time_model_evaluation(quantized_model, test_data) ###################################################################### +# Running this locally on a MacBook Pro, without quantization, inference takes about 200 seconds, +# and with quantization it takes just about 100 seconds. +# # Conclusion # ---------- #