From 8c85513ca457c881fee497397461469c7ec3936b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8C=97=E6=B5=B7=E8=8B=A5?= <flandre@scarletx.cn>
Date: Sun, 5 Mar 2023 14:11:56 +0800
Subject: [PATCH 1/2] Correct occurences of `autocast` in `amp` recipe

Replace the incorrect occurences of `torch.cuda.amp.autocast` with `torch.autocast`.
---
 recipes_source/recipes/amp_recipe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 86b278d351e..d6281b3b7f4 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -11,7 +11,7 @@
 range of ``float32``.  Mixed precision tries to match each op to its appropriate datatype,
 which can reduce your network's runtime and memory footprint.
 
-Ordinarily, "automatic mixed precision training" uses `torch.autocast <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast>`_ and
+Ordinarily, "automatic mixed precision training" uses `torch.autocast <https://pytorch.org/docs/stable/amp.html#torch.autocast>`_ and
 `torch.cuda.amp.GradScaler <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler>`_ together.
 
 This recipe measures the performance of a simple network in default precision,
@@ -105,7 +105,7 @@ def make_model(in_size, out_size, num_layers):
 ##########################################################
 # Adding autocast
 # ---------------
-# Instances of `torch.cuda.amp.autocast <https://pytorch.org/docs/stable/amp.html#autocasting>`_
+# Instances of `torch.autocast <https://pytorch.org/docs/stable/amp.html#autocasting>`_
 # serve as context managers that allow regions of your script to run in mixed precision.
 #
 # In these regions, CUDA ops run in a dtype chosen by autocast
@@ -310,7 +310,7 @@ def make_model(in_size, out_size, num_layers):
 # 1. Disable ``autocast`` or ``GradScaler`` individually (by passing ``enabled=False`` to their constructor) and see if infs/NaNs persist.
 # 2. If you suspect part of your network (e.g., a complicated loss function) overflows , run that forward region in ``float32``
 #    and see if infs/NaNs persist.
-#    `The autocast docstring <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast>`_'s last code snippet
+#    `The autocast docstring <https://pytorch.org/docs/stable/amp.html#torch.autocast>`_'s last code snippet
 #    shows forcing a subregion to run in ``float32`` (by locally disabling autocast and casting the subregion's inputs).
 #
 # Type mismatch error (may manifest as CUDNN_STATUS_BAD_PARAM)

From 1cbbf795aa131e8f2ef5c1bf54281a9aae23f577 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@fb.com>
Date: Mon, 6 Mar 2023 08:27:17 -0800
Subject: [PATCH 2/2] Update amp_recipe.py

---
 recipes_source/recipes/amp_recipe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index d6281b3b7f4..eb92b90d0cc 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -19,7 +19,7 @@
 mixed precision with improved performance.
 
 You may download and run this recipe as a standalone Python script.
-The only requirements are Pytorch 1.6+ and a CUDA-capable GPU.
+The only requirements are PyTorch 1.6 or later and a CUDA-capable GPU.
 
 Mixed precision primarily benefits Tensor Core-enabled architectures (Volta, Turing, Ampere).
 This recipe should show significant (2-3X) speedup on those architectures.