pytorch · malfet · Jan 30, 2025 · Jan 24, 2025 · Jan 26, 2025 · Jan 28, 2025
diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
@@ -150,7 +150,7 @@ def make_model(in_size, out_size, num_layers):
 # The same ``GradScaler`` instance should be used for the entire convergence run.
 # If you perform multiple convergence runs in the same script, each run should use
 # a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight.
-scaler = torch.cuda.amp.GradScaler()
+scaler = torch.GradScaler("cuda")
 
 for epoch in range(0): # 0 epochs, this section is for illustration only
     for input, target in zip(data, targets):
@@ -182,7 +182,7 @@ def make_model(in_size, out_size, num_layers):
 
 net = make_model(in_size, out_size, num_layers)
 opt = torch.optim.SGD(net.parameters(), lr=0.001)
-scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
+scaler = torch.GradScaler("cuda" ,enabled=use_amp)
 
 start_timer()
 for epoch in range(epochs):