touchups

mcarilli · mcarilli · commit 3e9815a77a87 · 2020-09-12T15:20:45.000-06:00
diff --git a/advanced_source/dispatcher.rst b/advanced_source/dispatcher.rst
@@ -302,7 +302,7 @@ the safest choice for the execution type:
     }
 
 If your custom op is :ref:`autograd-enabled<autograd-support>`, you only need to write and register
-an autocast wrapper for same name onto which the autograd wrapper is registered.
+an autocast wrapper for the same name onto which the autograd wrapper is registered.
 For example, if you wanted an autocast wrapper for the ``myadd`` function shown
 in the autograd section, all you'd need is
 
diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
@@ -166,7 +166,7 @@ def make_model(in_size, out_size, num_layers):
         # Updates the scale for next iteration.
         scaler.update()
 
-        opt.zero_grad()
+        opt.zero_grad() # set_to_none=True here can modestly improve performance
 
 ##########################################################
 # All together: "Automatic Mixed Precision"
@@ -190,7 +190,7 @@ def make_model(in_size, out_size, num_layers):
         scaler.scale(loss).backward()
         scaler.step(opt)
         scaler.update()
-        opt.zero_grad()
+        opt.zero_grad() # set_to_none=True here can modestly improve performance
 end_timer_and_print("Mixed precision:")
 
 ##########################################################
@@ -216,7 +216,7 @@ def make_model(in_size, out_size, num_layers):
 
         scaler.step(opt)
         scaler.update()
-        opt.zero_grad()
+        opt.zero_grad() # set_to_none=True here can modestly improve performance
 
 ##########################################################
 # Saving/Resuming
@@ -232,14 +232,16 @@ def make_model(in_size, out_size, num_layers):
 checkpoint = {"model": net.state_dict(),
               "optimizer": opt.state_dict(),
               "scaler": scaler.state_dict()}
+# Write checkpoint as desired, e.g.,
+# torch.save(checkpoint, "filename")
 
 ##########################################################
-# (write checkpoint as desired, e.g., ``torch.save(checkpoint, "filename")``.)
-#
 # When resuming, load the scaler state dict alongside the model and optimizer state dicts.
-# (read checkpoint as desired, e.g.,
-# ``checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(torch.cuda.current_device()))``)
 
+# Read checkpoint as desired, e.g.,
+# dev = torch.cuda.current_device()
+# checkpoint = torch.load("filename",
+#                         map_location = lambda storage, loc: storage.cuda(dev))
 net.load_state_dict(checkpoint["model"])
 opt.load_state_dict(checkpoint["optimizer"])
 scaler.load_state_dict(checkpoint["scaler"])
@@ -294,7 +296,7 @@ def make_model(in_size, out_size, num_layers):
 # 2. Your network may be GPU compute bound (lots of matmuls/convolutions) but your GPU does not have Tensor Cores.
 #    In this case a reduced speedup is expected.
 # 3. Matmul dimensions are not Tensor Core-friendly.  Make sure matmuls' participating sizes are multiples of 8.
-#    (For NLP models with encoders/decoders, this can be subtle.  Also. convolutions used to have similar size constraints
+#    (For NLP models with encoders/decoders, this can be subtle.  Also, convolutions used to have similar size constraints
 #    for Tensor Core use, but for CuDNN versions 7.3 and later, no such constraints exist.  See
 #    `here <https://github.com/NVIDIA/apex/issues/221#issuecomment-478084841>`_ for guidance.)
 #

Original file line number	Diff line number	Diff line change
`@@ -302,7 +302,7 @@ the safest choice for the execution type:`
`302`	`302`	`}`
`303`	`303`
`304`	`304`	If your custom op is :ref:`autograd-enabled<autograd-support>`, you only need to write and register
`305`		`-an autocast wrapper for same name onto which the autograd wrapper is registered.`
	`305`	`+an autocast wrapper for the same name onto which the autograd wrapper is registered.`
`306`	`306`	For example, if you wanted an autocast wrapper for the ``myadd`` function shown
`307`	`307`	`in the autograd section, all you'd need is`
`308`	`308`