diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py index 0fb508d1ccc..a1603510b96 100644 --- a/beginner_source/basics/optimization_tutorial.py +++ b/beginner_source/basics/optimization_tutorial.py @@ -149,6 +149,9 @@ def forward(self, x): def train_loop(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) + # Set the model to training mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.train() for batch, (X, y) in enumerate(dataloader): # Compute prediction and loss pred = model(X) @@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer): def test_loop(dataloader, model, loss_fn): + # Set the model to evaluation mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.eval() size = len(dataloader.dataset) num_batches = len(dataloader) test_loss, correct = 0, 0 + # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode + # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True with torch.no_grad(): for X, y in dataloader: pred = model(X)