Skip to content

Commit ed8c29d

Browse files
authored
Merge branch 'main' into main
2 parents f453a23 + 769cff9 commit ed8c29d

10 files changed

+143
-95
lines changed

beginner_source/basics/optimization_tutorial.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ def forward(self, x):
149149

150150
def train_loop(dataloader, model, loss_fn, optimizer):
151151
size = len(dataloader.dataset)
152+
# Set the model to training mode - important for batch normalization and dropout layers
153+
# Unnecessary in this situation but added for best practices
154+
model.train()
152155
for batch, (X, y) in enumerate(dataloader):
153156
# Compute prediction and loss
154157
pred = model(X)
@@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer):
165168

166169

167170
def test_loop(dataloader, model, loss_fn):
171+
# Set the model to evaluation mode - important for batch normalization and dropout layers
172+
# Unnecessary in this situation but added for best practices
173+
model.eval()
168174
size = len(dataloader.dataset)
169175
num_batches = len(dataloader)
170176
test_loss, correct = 0, 0
171177

178+
# Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
179+
# also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
172180
with torch.no_grad():
173181
for X, y in dataloader:
174182
pred = model(X)

beginner_source/data_loading_tutorial.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,7 @@ def __getitem__(self, idx):
165165

166166
fig = plt.figure()
167167

168-
for i in range(len(face_dataset)):
169-
sample = face_dataset[i]
170-
168+
for i, sample in enumerate(face_dataset):
171169
print(i, sample['image'].shape, sample['landmarks'].shape)
172170

173171
ax = plt.subplot(1, 4, i + 1)
@@ -268,8 +266,8 @@ def __call__(self, sample):
268266
h, w = image.shape[:2]
269267
new_h, new_w = self.output_size
270268

271-
top = np.random.randint(0, h - new_h)
272-
left = np.random.randint(0, w - new_w)
269+
top = np.random.randint(0, h - new_h + 1)
270+
left = np.random.randint(0, w - new_w + 1)
273271

274272
image = image[top: top + new_h,
275273
left: left + new_w]
@@ -294,7 +292,7 @@ def __call__(self, sample):
294292

295293
######################################################################
296294
# .. note::
297-
# In the example above, `RandomCrop` uses an external library's random number generator
295+
# In the example above, `RandomCrop` uses an external library's random number generator
298296
# (in this case, Numpy's `np.random.int`). This can result in unexpected behavior with `DataLoader`
299297
# (see `here <https://pytorch.org/docs/stable/notes/faq.html#my-data-loader-workers-return-identical-random-numbers>`_).
300298
# In practice, it is safer to stick to PyTorch's random number generator, e.g. by using `torch.randint` instead.
@@ -356,9 +354,7 @@ def __call__(self, sample):
356354
ToTensor()
357355
]))
358356

359-
for i in range(len(transformed_dataset)):
360-
sample = transformed_dataset[i]
361-
357+
for i, sample in enumerate(transformed_dataset):
362358
print(i, sample['image'].size(), sample['landmarks'].size())
363359

364360
if i == 3:

beginner_source/former_torchies/parallelism_tutorial.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ def forward(self, x):
5353

5454
class MyDataParallel(nn.DataParallel):
5555
def __getattr__(self, name):
56-
return getattr(self.module, name)
56+
try:
57+
return super().__getattr__(name)
58+
except AttributeError:
59+
return getattr(self.module, name)
5760

5861
########################################################################
5962
# **Primitives on which DataParallel is implemented upon:**

beginner_source/introyt/introyt1_tutorial.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def num_flat_features(self, x):
288288

289289
transform = transforms.Compose(
290290
[transforms.ToTensor(),
291-
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
291+
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
292292

293293

294294
##########################################################################
@@ -297,9 +297,28 @@ def num_flat_features(self, x):
297297
# - ``transforms.ToTensor()`` converts images loaded by Pillow into
298298
# PyTorch tensors.
299299
# - ``transforms.Normalize()`` adjusts the values of the tensor so
300-
# that their average is zero and their standard deviation is 0.5. Most
300+
# that their average is zero and their standard deviation is 1.0. Most
301301
# activation functions have their strongest gradients around x = 0, so
302302
# centering our data there can speed learning.
303+
# The values passed to the transform are the means (first tuple) and the
304+
# standard deviations (second tuple) of the rgb values of the images in
305+
# the dataset. You can calculate these values yourself by running these
306+
# few lines of code:
307+
# ```
308+
# from torch.utils.data import ConcatDataset
309+
# transform = transforms.Compose([transforms.ToTensor()])
310+
# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
311+
# download=True, transform=transform)
312+
#
313+
# #stack all train images together into a tensor of shape
314+
# #(50000, 3, 32, 32)
315+
# x = torch.stack([sample[0] for sample in ConcatDataset([trainset])])
316+
#
317+
# #get the mean of each channel
318+
# mean = torch.mean(x, dim=(0,2,3)) #tensor([0.4914, 0.4822, 0.4465])
319+
# std = torch.std(x, dim=(0,2,3)) #tensor([0.2470, 0.2435, 0.2616])
320+
#
321+
# ```
303322
#
304323
# There are many more transforms available, including cropping, centering,
305324
# rotation, and reflection.

beginner_source/nn_tutorial.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,7 @@ def __len__(self):
795795
return len(self.dl)
796796

797797
def __iter__(self):
798-
batches = iter(self.dl)
799-
for b in batches:
798+
for b in self.dl:
800799
yield (self.func(*b))
801800

802801
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)

beginner_source/transfer_learning_tutorial.py

Lines changed: 66 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import matplotlib.pyplot as plt
4747
import time
4848
import os
49-
import copy
49+
from tempfile import TemporaryDirectory
5050

5151
cudnn.benchmark = True
5252
plt.ion() # interactive mode
@@ -146,67 +146,71 @@ def imshow(inp, title=None):
146146
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
147147
since = time.time()
148148

149-
best_model_wts = copy.deepcopy(model.state_dict())
150-
best_acc = 0.0
151-
152-
for epoch in range(num_epochs):
153-
print(f'Epoch {epoch}/{num_epochs - 1}')
154-
print('-' * 10)
155-
156-
# Each epoch has a training and validation phase
157-
for phase in ['train', 'val']:
158-
if phase == 'train':
159-
model.train() # Set model to training mode
160-
else:
161-
model.eval() # Set model to evaluate mode
162-
163-
running_loss = 0.0
164-
running_corrects = 0
165-
166-
# Iterate over data.
167-
for inputs, labels in dataloaders[phase]:
168-
inputs = inputs.to(device)
169-
labels = labels.to(device)
170-
171-
# zero the parameter gradients
172-
optimizer.zero_grad()
173-
174-
# forward
175-
# track history if only in train
176-
with torch.set_grad_enabled(phase == 'train'):
177-
outputs = model(inputs)
178-
_, preds = torch.max(outputs, 1)
179-
loss = criterion(outputs, labels)
180-
181-
# backward + optimize only if in training phase
182-
if phase == 'train':
183-
loss.backward()
184-
optimizer.step()
185-
186-
# statistics
187-
running_loss += loss.item() * inputs.size(0)
188-
running_corrects += torch.sum(preds == labels.data)
189-
if phase == 'train':
190-
scheduler.step()
191-
192-
epoch_loss = running_loss / dataset_sizes[phase]
193-
epoch_acc = running_corrects.double() / dataset_sizes[phase]
194-
195-
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
196-
197-
# deep copy the model
198-
if phase == 'val' and epoch_acc > best_acc:
199-
best_acc = epoch_acc
200-
best_model_wts = copy.deepcopy(model.state_dict())
201-
202-
print()
203-
204-
time_elapsed = time.time() - since
205-
print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
206-
print(f'Best val Acc: {best_acc:4f}')
207-
208-
# load best model weights
209-
model.load_state_dict(best_model_wts)
149+
# Create a temporary directory to save training checkpoints
150+
with TemporaryDirectory() as tempdir:
151+
best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')
152+
153+
torch.save(model.state_dict(), best_model_params_path)
154+
best_acc = 0.0
155+
156+
for epoch in range(num_epochs):
157+
print(f'Epoch {epoch}/{num_epochs - 1}')
158+
print('-' * 10)
159+
160+
# Each epoch has a training and validation phase
161+
for phase in ['train', 'val']:
162+
if phase == 'train':
163+
model.train() # Set model to training mode
164+
else:
165+
model.eval() # Set model to evaluate mode
166+
167+
running_loss = 0.0
168+
running_corrects = 0
169+
170+
# Iterate over data.
171+
for inputs, labels in dataloaders[phase]:
172+
inputs = inputs.to(device)
173+
labels = labels.to(device)
174+
175+
# zero the parameter gradients
176+
optimizer.zero_grad()
177+
178+
# forward
179+
# track history if only in train
180+
with torch.set_grad_enabled(phase == 'train'):
181+
outputs = model(inputs)
182+
_, preds = torch.max(outputs, 1)
183+
loss = criterion(outputs, labels)
184+
185+
# backward + optimize only if in training phase
186+
if phase == 'train':
187+
loss.backward()
188+
optimizer.step()
189+
190+
# statistics
191+
running_loss += loss.item() * inputs.size(0)
192+
running_corrects += torch.sum(preds == labels.data)
193+
if phase == 'train':
194+
scheduler.step()
195+
196+
epoch_loss = running_loss / dataset_sizes[phase]
197+
epoch_acc = running_corrects.double() / dataset_sizes[phase]
198+
199+
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
200+
201+
# deep copy the model
202+
if phase == 'val' and epoch_acc > best_acc:
203+
best_acc = epoch_acc
204+
torch.save(model.state_dict(), best_model_params_path)
205+
206+
print()
207+
208+
time_elapsed = time.time() - since
209+
print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
210+
print(f'Best val Acc: {best_acc:4f}')
211+
212+
# load best model weights
213+
model.load_state_dict(torch.load(best_model_params_path))
210214
return model
211215

212216

intermediate_source/char_rnn_classification_tutorial.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
**************************************************************
55
**Author**: `Sean Robertson <https://github.com/spro>`_
66
7-
We will be building and training a basic character-level RNN to classify
8-
words. This tutorial, along with the following two, show how to do
9-
preprocess data for NLP modeling "from scratch", in particular not using
10-
many of the convenience functions of `torchtext`, so you can see how
11-
preprocessing for NLP modeling works at a low level.
7+
We will be building and training a basic character-level Recurrent Neural
8+
Network (RNN) to classify words. This tutorial, along with two other
9+
Natural Language Processing (NLP) "from scratch" tutorials
10+
:doc:`/intermediate/char_rnn_generation_tutorial` and
11+
:doc:`/intermediate/seq2seq_translation_tutorial`, show how to
12+
preprocess data to model NLP. In particular these tutorials do not
13+
use many of the convenience functions of `torchtext`, so you can see how
14+
preprocessing to model NLP works at a low level.
1215
1316
A character-level RNN reads words as a series of characters -
1417
outputting a prediction and "hidden state" at each step, feeding its
@@ -32,13 +35,15 @@
3235
(-2.68) Dutch
3336
3437
35-
**Recommended Reading:**
38+
Recommended Preparation
39+
=======================
3640
37-
I assume you have at least installed PyTorch, know Python, and
38-
understand Tensors:
41+
Before starting this tutorial it is recommended that you have installed PyTorch,
42+
and have a basic understanding of Python programming language and Tensors:
3943
4044
- https://pytorch.org/ For installation instructions
4145
- :doc:`/beginner/deep_learning_60min_blitz` to get started with PyTorch in general
46+
and learn the basics of Tensors
4247
- :doc:`/beginner/pytorch_with_examples` for a wide and deep overview
4348
- :doc:`/beginner/former_torchies_tutorial` if you are former Lua Torch user
4449
@@ -181,10 +186,6 @@ def lineToTensor(line):
181186
# is just 2 linear layers which operate on an input and hidden state, with
182187
# a ``LogSoftmax`` layer after the output.
183188
#
184-
# .. figure:: https://i.imgur.com/Z2xbySO.png
185-
# :alt:
186-
#
187-
#
188189

189190
import torch.nn as nn
190191

@@ -195,13 +196,13 @@ def __init__(self, input_size, hidden_size, output_size):
195196
self.hidden_size = hidden_size
196197

197198
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
198-
self.i2o = nn.Linear(input_size + hidden_size, output_size)
199+
self.h2o = nn.Linear(hidden_size, output_size)
199200
self.softmax = nn.LogSoftmax(dim=1)
200201

201202
def forward(self, input, hidden):
202203
combined = torch.cat((input, hidden), 1)
203204
hidden = self.i2h(combined)
204-
output = self.i2o(combined)
205+
output = self.h2o(hidden)
205206
output = self.softmax(output)
206207
return output, hidden
207208

intermediate_source/char_rnn_generation_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def train(category_tensor, input_line_tensor, target_line_tensor):
278278

279279
rnn.zero_grad()
280280

281-
loss = 0
281+
loss = torch.Tensor([0]) # you can also just simply use ``loss = 0``
282282

283283
for i in range(input_line_tensor.size(0)):
284284
output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)

intermediate_source/dynamic_quantization_bert_tutorial.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ model before and after the dynamic quantization.
255255
torch.manual_seed(seed)
256256
set_seed(42)
257257
258+
# Initialize a global random number generator
259+
global_rng = random.Random()
260+
258261
259262
2.2 Load the fine-tuned BERT model
260263
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -525,6 +528,21 @@ We can serialize and save the quantized model for the future use using
525528

526529
.. code:: python
527530
531+
def ids_tensor(shape, vocab_size, rng=None, name=None):
532+
# Creates a random int32 tensor of the shape within the vocab size
533+
if rng is None:
534+
rng = global_rng
535+
536+
total_dims = 1
537+
for dim in shape:
538+
total_dims *= dim
539+
540+
values = []
541+
for _ in range(total_dims):
542+
values.append(rng.randint(0, vocab_size - 1))
543+
544+
return torch.tensor(data=values, dtype=torch.long, device='cpu').view(shape).contiguous()
545+
528546
input_ids = ids_tensor([8, 128], 2)
529547
token_type_ids = ids_tensor([8, 128], 2)
530548
attention_mask = ids_tensor([8, 128], vocab_size=2)

prototype_source/fx_graph_mode_ptq_static.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,9 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
214214
float_model = load_model(saved_model_dir + float_model_file).to("cpu")
215215
float_model.eval()
216216
217-
# deepcopy the model since we need to keep the original model around
218-
import copy
219-
model_to_quantize = copy.deepcopy(float_model)
217+
# create another instance of the model since
218+
# we need to keep the original model around
219+
model_to_quantize = load_model(saved_model_dir + float_model_file).to("cpu")
220220
221221
3. Set model to eval mode
222222
-------------------------
@@ -408,4 +408,4 @@ Running the model in AIBench (with single threading) gives the following result:
408408
409409
As we can see for resnet18 both FX graph mode and eager mode quantized model get similar speedup over the floating point model,
410410
which is around 2-4x faster than the floating point model. But the actual speedup over floating point model may vary
411-
depending on model, device, build, input batch sizes, threading etc.
411+
depending on model, device, build, input batch sizes, threading etc.

0 commit comments

Comments
 (0)