Skip to content

Commit 3e9815a

Browse files
committed
touchups
1 parent a824b85 commit 3e9815a

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

advanced_source/dispatcher.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ the safest choice for the execution type:
302302
}
303303
304304
If your custom op is :ref:`autograd-enabled<autograd-support>`, you only need to write and register
305-
an autocast wrapper for same name onto which the autograd wrapper is registered.
305+
an autocast wrapper for the same name onto which the autograd wrapper is registered.
306306
For example, if you wanted an autocast wrapper for the ``myadd`` function shown
307307
in the autograd section, all you'd need is
308308

recipes_source/recipes/amp_recipe.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def make_model(in_size, out_size, num_layers):
166166
# Updates the scale for next iteration.
167167
scaler.update()
168168

169-
opt.zero_grad()
169+
opt.zero_grad() # set_to_none=True here can modestly improve performance
170170

171171
##########################################################
172172
# All together: "Automatic Mixed Precision"
@@ -190,7 +190,7 @@ def make_model(in_size, out_size, num_layers):
190190
scaler.scale(loss).backward()
191191
scaler.step(opt)
192192
scaler.update()
193-
opt.zero_grad()
193+
opt.zero_grad() # set_to_none=True here can modestly improve performance
194194
end_timer_and_print("Mixed precision:")
195195

196196
##########################################################
@@ -216,7 +216,7 @@ def make_model(in_size, out_size, num_layers):
216216

217217
scaler.step(opt)
218218
scaler.update()
219-
opt.zero_grad()
219+
opt.zero_grad() # set_to_none=True here can modestly improve performance
220220

221221
##########################################################
222222
# Saving/Resuming
@@ -232,14 +232,16 @@ def make_model(in_size, out_size, num_layers):
232232
checkpoint = {"model": net.state_dict(),
233233
"optimizer": opt.state_dict(),
234234
"scaler": scaler.state_dict()}
235+
# Write checkpoint as desired, e.g.,
236+
# torch.save(checkpoint, "filename")
235237

236238
##########################################################
237-
# (write checkpoint as desired, e.g., ``torch.save(checkpoint, "filename")``.)
238-
#
239239
# When resuming, load the scaler state dict alongside the model and optimizer state dicts.
240-
# (read checkpoint as desired, e.g.,
241-
# ``checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(torch.cuda.current_device()))``)
242240

241+
# Read checkpoint as desired, e.g.,
242+
# dev = torch.cuda.current_device()
243+
# checkpoint = torch.load("filename",
244+
# map_location = lambda storage, loc: storage.cuda(dev))
243245
net.load_state_dict(checkpoint["model"])
244246
opt.load_state_dict(checkpoint["optimizer"])
245247
scaler.load_state_dict(checkpoint["scaler"])
@@ -294,7 +296,7 @@ def make_model(in_size, out_size, num_layers):
294296
# 2. Your network may be GPU compute bound (lots of matmuls/convolutions) but your GPU does not have Tensor Cores.
295297
# In this case a reduced speedup is expected.
296298
# 3. Matmul dimensions are not Tensor Core-friendly. Make sure matmuls' participating sizes are multiples of 8.
297-
# (For NLP models with encoders/decoders, this can be subtle. Also. convolutions used to have similar size constraints
299+
# (For NLP models with encoders/decoders, this can be subtle. Also, convolutions used to have similar size constraints
298300
# for Tensor Core use, but for CuDNN versions 7.3 and later, no such constraints exist. See
299301
# `here <https://github.com/NVIDIA/apex/issues/221#issuecomment-478084841>`_ for guidance.)
300302
#

0 commit comments

Comments
 (0)