Merge pull request #693 from pytorch/fix_perf_quantization

SethHWeidman · web-flow · commit 6ff1ed981f2d · 2019-10-10T14:17:39.000-07:00
Update dynamic quantization tutorial
diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
@@ -269,6 +269,11 @@ def print_size_of_model(model):
 
 ######################################################################
 # Second, we see faster inference time, with no difference in evaluation loss:
+#
+# Note: we number of threads to one for single threaded comparison, since quantized
+# models run single threaded.
+
+torch.set_num_threads(1)
 
 def time_model_evaluation(model, test_data):
     s = time.time()
@@ -280,6 +285,9 @@ def time_model_evaluation(model, test_data):
 time_model_evaluation(quantized_model, test_data)
 
 ######################################################################
+# Running this locally on a MacBook Pro, without quantization, inference takes about 200 seconds,
+# and with quantization it takes just about 100 seconds.
+#
 # Conclusion
 # ----------
 #