Merge branch 'main' into resolve-2332

sekyondaMeta · web-flow · commit 90e56eae6b9b · 2023-06-02T10:17:47.000-04:00
diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py
@@ -149,6 +149,9 @@ def forward(self, x):
 
 def train_loop(dataloader, model, loss_fn, optimizer):
     size = len(dataloader.dataset)
+    # Set the model to training mode - important for batch normalization and dropout layers
+    # Unnecessary in this situation but added for best practices
+    model.train()
     for batch, (X, y) in enumerate(dataloader):
         # Compute prediction and loss
         pred = model(X)
@@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer):
 
 
 def test_loop(dataloader, model, loss_fn):
+    # Set the model to evaluation mode - important for batch normalization and dropout layers
+    # Unnecessary in this situation but added for best practices
+    model.eval()
     size = len(dataloader.dataset)
     num_batches = len(dataloader)
     test_loss, correct = 0, 0
 
+    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
+    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
     with torch.no_grad():
         for X, y in dataloader:
             pred = model(X)
diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py
@@ -288,7 +288,7 @@ def num_flat_features(self, x):
 
 transform = transforms.Compose(
     [transforms.ToTensor(),
-     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
 
 
 ##########################################################################
@@ -297,9 +297,28 @@ def num_flat_features(self, x):
 # -  ``transforms.ToTensor()`` converts images loaded by Pillow into 
 #    PyTorch tensors.
 # -  ``transforms.Normalize()`` adjusts the values of the tensor so
-#    that their average is zero and their standard deviation is 0.5. Most
+#    that their average is zero and their standard deviation is 1.0. Most
 #    activation functions have their strongest gradients around x = 0, so
 #    centering our data there can speed learning.
+#    The values passed to the transform are the means (first tuple) and the
+#    standard deviations (second tuple) of the rgb values of the images in
+#    the dataset. You can calculate these values yourself by running these
+#    few lines of code:
+#          ```
+#           from torch.utils.data import ConcatDataset
+#           transform = transforms.Compose([transforms.ToTensor()])
+#           trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
+#                                        download=True, transform=transform)
+#
+#           #stack all train images together into a tensor of shape 
+#           #(50000, 3, 32, 32)
+#           x = torch.stack([sample[0] for sample in ConcatDataset([trainset])])
+#           
+#           #get the mean of each channel            
+#           mean = torch.mean(x, dim=(0,2,3)) #tensor([0.4914, 0.4822, 0.4465])
+#           std = torch.std(x, dim=(0,2,3)) #tensor([0.2470, 0.2435, 0.2616])  
+# 
+#          ```   
 # 
 # There are many more transforms available, including cropping, centering,
 # rotation, and reflection.
diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst
@@ -214,9 +214,9 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
     float_model = load_model(saved_model_dir + float_model_file).to("cpu")
     float_model.eval()
 
-    # deepcopy the model since we need to keep the original model around
-    import copy
-    model_to_quantize = copy.deepcopy(float_model)
+    # create another instance of the model since
+    # we need to keep the original model around
+    model_to_quantize = load_model(saved_model_dir + float_model_file).to("cpu")
 
 3. Set model to eval mode
 -------------------------
@@ -408,4 +408,4 @@ Running the model in AIBench (with single threading) gives the following result:
 
 As we can see for resnet18 both FX graph mode and eager mode quantized model get similar speedup over the floating point model,
 which is around 2-4x faster than the floating point model. But the actual speedup over floating point model may vary
-depending on model, device, build, input batch sizes, threading etc.
+depending on model, device, build, input batch sizes, threading etc.