From bdc5212304939a992691f3f7e62f4285e54c3f01 Mon Sep 17 00:00:00 2001
From: Seth Weidman <sethweidman@fb.com>
Date: Thu, 10 Oct 2019 13:28:28 -0700
Subject: [PATCH] Update dynamic quantization tutorial

---
 advanced_source/dynamic_quantization_tutorial.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
index b26dc079cb8..f0b36eb9dd8 100644
--- a/advanced_source/dynamic_quantization_tutorial.py
+++ b/advanced_source/dynamic_quantization_tutorial.py
@@ -269,6 +269,11 @@ def print_size_of_model(model):
 
 ######################################################################
 # Second, we see faster inference time, with no difference in evaluation loss:
+#
+# Note: we number of threads to one for single threaded comparison, since quantized
+# models run single threaded.
+
+torch.set_num_threads(1)
 
 def time_model_evaluation(model, test_data):
     s = time.time()
@@ -280,6 +285,9 @@ def time_model_evaluation(model, test_data):
 time_model_evaluation(quantized_model, test_data)
 
 ######################################################################
+# Running this locally on a MacBook Pro, without quantization, inference takes about 200 seconds,
+# and with quantization it takes just about 100 seconds.
+#
 # Conclusion
 # ----------
 #