Skip to content

Commit 920e97a

Browse files
authored
Merge branch 'master' into quant-fix
2 parents 0f7c71b + 250f741 commit 920e97a

File tree

6 files changed

+23
-21
lines changed

6 files changed

+23
-21
lines changed

beginner_source/basics/optimization_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def forward(self, x):
135135
#####################################
136136
# Inside the training loop, optimization happens in three steps:
137137
# * Call ``optimizer.zero_grad()`` to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.
138-
# * Backpropagate the prediction loss with a call to ``loss.backwards()``. PyTorch deposits the gradients of the loss w.r.t. each parameter.
138+
# * Backpropagate the prediction loss with a call to ``loss.backward()``. PyTorch deposits the gradients of the loss w.r.t. each parameter.
139139
# * Once we have our gradients, we call ``optimizer.step()`` to adjust the parameters by the gradients collected in the backward pass.
140140

141141

beginner_source/blitz/autograd_tutorial.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,16 @@
3232
3333
3434
Usage in PyTorch
35-
~~~~~~~~~~~
35+
~~~~~~~~~~~~~~~~
3636
Let's take a look at a single training step.
3737
For this example, we load a pretrained resnet18 model from ``torchvision``.
3838
We create a random data tensor to represent a single image with 3 channels, and height & width of 64,
39-
and its corresponding ``label`` initialized to some random values.
39+
and its corresponding ``label`` initialized to some random values. Label in pretrained models has
40+
shape (1,1000).
41+
42+
.. note::
43+
This tutorial work only on CPU and will not work on GPU (even if tensor are moved to CUDA).
44+
4045
"""
4146
import torch, torchvision
4247
model = torchvision.models.resnet18(pretrained=True)
@@ -61,7 +66,7 @@
6166
loss.backward() # backward pass
6267

6368
############################################################
64-
# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and momentum of 0.9.
69+
# Next, we load an optimizer, in this case SGD with a learning rate of 0.01 and `momentum <https://towardsdatascience.com/stochastic-gradient-descent-with-momentum-a84097641a5d>`__ of 0.9.
6570
# We register all the parameters of the model in the optimizer.
6671
#
6772

beginner_source/blitz/cifar10_tutorial.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def imshow(img):
110110
# show images
111111
imshow(torchvision.utils.make_grid(images))
112112
# print labels
113-
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))
113+
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
114114

115115

116116
########################################################################
@@ -182,8 +182,7 @@ def forward(self, x):
182182
# print statistics
183183
running_loss += loss.item()
184184
if i % 2000 == 1999: # print every 2000 mini-batches
185-
print('[%d, %5d] loss: %.3f' %
186-
(epoch + 1, i + 1, running_loss / 2000))
185+
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
187186
running_loss = 0.0
188187

189188
print('Finished Training')
@@ -215,7 +214,7 @@ def forward(self, x):
215214

216215
# print images
217216
imshow(torchvision.utils.make_grid(images))
218-
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
217+
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
219218

220219
########################################################################
221220
# Next, let's load back in our saved model (note: saving and re-loading the model
@@ -236,7 +235,7 @@ def forward(self, x):
236235
# So, let's get the index of the highest energy:
237236
_, predicted = torch.max(outputs, 1)
238237

239-
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
238+
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
240239
for j in range(4)))
241240

242241
########################################################################
@@ -250,15 +249,14 @@ def forward(self, x):
250249
with torch.no_grad():
251250
for data in testloader:
252251
images, labels = data
253-
# calculate outputs by running images through the network
252+
# calculate outputs by running images through the network
254253
outputs = net(images)
255254
# the class with the highest energy is what we choose as prediction
256255
_, predicted = torch.max(outputs.data, 1)
257256
total += labels.size(0)
258257
correct += (predicted == labels).sum().item()
259258

260-
print('Accuracy of the network on the 10000 test images: %d %%' % (
261-
100 * correct / total))
259+
print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')
262260

263261
########################################################################
264262
# That looks way better than chance, which is 10% accuracy (randomly picking
@@ -275,21 +273,20 @@ def forward(self, x):
275273
# again no gradients needed
276274
with torch.no_grad():
277275
for data in testloader:
278-
images, labels = data
279-
outputs = net(images)
276+
images, labels = data
277+
outputs = net(images)
280278
_, predictions = torch.max(outputs, 1)
281279
# collect the correct predictions for each class
282280
for label, prediction in zip(labels, predictions):
283281
if label == prediction:
284282
correct_pred[classes[label]] += 1
285283
total_pred[classes[label]] += 1
286284

287-
285+
288286
# print accuracy for each class
289287
for classname, correct_count in correct_pred.items():
290288
accuracy = 100 * float(correct_count) / total_pred[classname]
291-
print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
292-
accuracy))
289+
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
293290

294291
########################################################################
295292
# Okay, so what next?
@@ -304,7 +301,7 @@ def forward(self, x):
304301
# Let's first define our device as the first visible cuda device if we have
305302
# CUDA available:
306303

307-
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
304+
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
308305

309306
# Assuming that we are on a CUDA machine, this should print a CUDA device:
310307

beginner_source/dcgan_faces_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def weights_init(m):
319319
# .. figure:: /_static/img/dcgan_generator.png
320320
# :alt: dcgan_generator
321321
#
322-
# Notice, the how the inputs we set in the input section (*nz*, *ngf*, and
322+
# Notice, how the inputs we set in the input section (*nz*, *ngf*, and
323323
# *nc*) influence the generator architecture in code. *nz* is the length
324324
# of the z input vector, *ngf* relates to the size of the feature maps
325325
# that are propagated through the generator, and *nc* is the number of

beginner_source/examples_nn/dynamic_net.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
66
To showcase the power of PyTorch dynamic graphs, we will implement a very strange
77
model: a third-fifth order polynomial that on each forward pass
8-
chooses a random number between 3 and 5 and uses that many orders, reusing
8+
chooses a random number between 4 and 5 and uses that many orders, reusing
99
the same weights multiple times to compute the fourth and fifth order.
1010
"""
1111
import random

beginner_source/nlp/sequence_models_tutorial.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
1414
A recurrent neural network is a network that maintains some kind of
1515
state. For example, its output could be used as part of the next input,
16-
so that information can propogate along as the network passes over the
16+
so that information can propagate along as the network passes over the
1717
sequence. In the case of an LSTM, for each element in the sequence,
1818
there is a corresponding *hidden state* :math:`h_t`, which in principle
1919
can contain information from arbitrary points earlier in the sequence.

0 commit comments

Comments
 (0)