From 85c31bada1a409df296df1ab02d8c0ab865a520f Mon Sep 17 00:00:00 2001
From: Usama Ahmed <53372259+0ssamaak0@users.noreply.github.com>
Date: Fri, 24 Jan 2025 16:34:13 +0300
Subject: [PATCH 1/2] change torch.cuda.amp.GradScaler to
 torch.GradScaler("cuda")

---
 recipes_source/recipes/amp_recipe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index b8a4d942333..421fb167a6c 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -150,7 +150,7 @@ def make_model(in_size, out_size, num_layers):
 # The same ``GradScaler`` instance should be used for the entire convergence run.
 # If you perform multiple convergence runs in the same script, each run should use
 # a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight.
-scaler = torch.cuda.amp.GradScaler()
+scaler = torch.GradScaler("cuda")
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
@@ -182,7 +182,7 @@ def make_model(in_size, out_size, num_layers):
 
 net = make_model(in_size, out_size, num_layers)
 opt = torch.optim.SGD(net.parameters(), lr=0.001)
-scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
+scaler = torch.GradScaler("cuda" ,enabled=use_amp)
 
 start_timer()
 for epoch in range(epochs):

From 0c71332909c005d0dcb12648b66689c6528ccc5a Mon Sep 17 00:00:00 2001
From: Usama Ahmed <53372259+0ssamaak0@users.noreply.github.com>
Date: Mon, 27 Jan 2025 00:56:42 +0300
Subject: [PATCH 2/2] changing torch.GradScaler to torch.amp.GradScaler

---
 recipes_source/recipes/amp_recipe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 421fb167a6c..91ce19a93a9 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -150,7 +150,7 @@ def make_model(in_size, out_size, num_layers):
 # The same ``GradScaler`` instance should be used for the entire convergence run.
 # If you perform multiple convergence runs in the same script, each run should use
 # a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight.
-scaler = torch.GradScaler("cuda")
+scaler = torch.amp.GradScaler("cuda")
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
@@ -182,7 +182,7 @@ def make_model(in_size, out_size, num_layers):
 
 net = make_model(in_size, out_size, num_layers)
 opt = torch.optim.SGD(net.parameters(), lr=0.001)
-scaler = torch.GradScaler("cuda" ,enabled=use_amp)
+scaler = torch.amp.GradScaler("cuda" ,enabled=use_amp)
 
 start_timer()
 for epoch in range(epochs):