From 3c4a7269de20c211e77f9e85b75f7bf9f145e6d6 Mon Sep 17 00:00:00 2001 From: zabboud Date: Thu, 1 Jun 2023 14:11:55 -0400 Subject: [PATCH] Fixes #2083 - explain model.eval, torch.no_grad --- beginner_source/basics/optimization_tutorial.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py index 0fb508d1ccc..a1603510b96 100644 --- a/beginner_source/basics/optimization_tutorial.py +++ b/beginner_source/basics/optimization_tutorial.py @@ -149,6 +149,9 @@ def forward(self, x): def train_loop(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) + # Set the model to training mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.train() for batch, (X, y) in enumerate(dataloader): # Compute prediction and loss pred = model(X) @@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer): def test_loop(dataloader, model, loss_fn): + # Set the model to evaluation mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.eval() size = len(dataloader.dataset) num_batches = len(dataloader) test_loss, correct = 0, 0 + # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode + # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True with torch.no_grad(): for X, y in dataloader: pred = model(X)