From 85c31bada1a409df296df1ab02d8c0ab865a520f Mon Sep 17 00:00:00 2001 From: Usama Ahmed <53372259+0ssamaak0@users.noreply.github.com> Date: Fri, 24 Jan 2025 16:34:13 +0300 Subject: [PATCH 1/2] change torch.cuda.amp.GradScaler to torch.GradScaler("cuda") --- recipes_source/recipes/amp_recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py index b8a4d942333..421fb167a6c 100644 --- a/recipes_source/recipes/amp_recipe.py +++ b/recipes_source/recipes/amp_recipe.py @@ -150,7 +150,7 @@ def make_model(in_size, out_size, num_layers): # The same ``GradScaler`` instance should be used for the entire convergence run. # If you perform multiple convergence runs in the same script, each run should use # a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight. -scaler = torch.cuda.amp.GradScaler() +scaler = torch.GradScaler("cuda") for epoch in range(0): # 0 epochs, this section is for illustration only for input, target in zip(data, targets): @@ -182,7 +182,7 @@ def make_model(in_size, out_size, num_layers): net = make_model(in_size, out_size, num_layers) opt = torch.optim.SGD(net.parameters(), lr=0.001) -scaler = torch.cuda.amp.GradScaler(enabled=use_amp) +scaler = torch.GradScaler("cuda" ,enabled=use_amp) start_timer() for epoch in range(epochs): From 0c71332909c005d0dcb12648b66689c6528ccc5a Mon Sep 17 00:00:00 2001 From: Usama Ahmed <53372259+0ssamaak0@users.noreply.github.com> Date: Mon, 27 Jan 2025 00:56:42 +0300 Subject: [PATCH 2/2] changing torch.GradScaler to torch.amp.GradScaler --- recipes_source/recipes/amp_recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py index 421fb167a6c..91ce19a93a9 100644 --- a/recipes_source/recipes/amp_recipe.py +++ b/recipes_source/recipes/amp_recipe.py @@ -150,7 +150,7 @@ def make_model(in_size, out_size, num_layers): # The same ``GradScaler`` instance should be used for the entire convergence run. # If you perform multiple convergence runs in the same script, each run should use # a dedicated fresh ``GradScaler`` instance. ``GradScaler`` instances are lightweight. -scaler = torch.GradScaler("cuda") +scaler = torch.amp.GradScaler("cuda") for epoch in range(0): # 0 epochs, this section is for illustration only for input, target in zip(data, targets): @@ -182,7 +182,7 @@ def make_model(in_size, out_size, num_layers): net = make_model(in_size, out_size, num_layers) opt = torch.optim.SGD(net.parameters(), lr=0.001) -scaler = torch.GradScaler("cuda" ,enabled=use_amp) +scaler = torch.amp.GradScaler("cuda" ,enabled=use_amp) start_timer() for epoch in range(epochs):