2
2
"""
3
3
What is `torch.nn` *really*?
4
4
============================
5
- by Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.
5
+
6
+ **Authors:** Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.
6
7
"""
8
+
7
9
###############################################################################
8
- # We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file,
10
+ # We recommend running this tutorial as a notebook, not a script. To download the notebook (`` .ipynb`` ) file,
9
11
# click the link at the top of the page.
10
12
#
11
13
# PyTorch provides the elegantly designed modules and classes `torch.nn <https://pytorch.org/docs/stable/nn.html>`_ ,
90
92
print (y_train .min (), y_train .max ())
91
93
92
94
###############################################################################
93
- # Neural net from scratch (no torch.nn)
95
+ # Neural net from scratch (without `` torch.nn`` )
94
96
# ---------------------------------------------
95
97
#
96
98
# Let's first create a model using nothing but PyTorch tensor operations. We're assuming
109
111
#
110
112
# .. note:: We are initializing the weights here with
111
113
# `Xavier initialisation <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
112
- # (by multiplying with 1/sqrt(n)).
114
+ # (by multiplying with `` 1/sqrt(n)`` ).
113
115
114
116
import math
115
117
123
125
# let's just write a plain matrix multiplication and broadcasted addition
124
126
# to create a simple linear model. We also need an activation function, so
125
127
# we'll write `log_softmax` and use it. Remember: although PyTorch
126
- # provides lots of pre-written loss functions, activation functions, and
128
+ # provides lots of prewritten loss functions, activation functions, and
127
129
# so forth, you can easily write your own using plain python. PyTorch will
128
130
# even create fast GPU or vectorized CPU code for your function
129
131
# automatically.
@@ -242,7 +244,7 @@ def accuracy(out, yb):
242
244
print (loss_func (model (xb ), yb ), accuracy (model (xb ), yb ))
243
245
244
246
###############################################################################
245
- # Using torch.nn.functional
247
+ # Using `` torch.nn.functional``
246
248
# ------------------------------
247
249
#
248
250
# We will now refactor our code, so that it does the same thing as before, only
@@ -278,7 +280,7 @@ def model(xb):
278
280
print (loss_func (model (xb ), yb ), accuracy (model (xb ), yb ))
279
281
280
282
###############################################################################
281
- # Refactor using nn.Module
283
+ # Refactor using `` nn.Module``
282
284
# -----------------------------
283
285
# Next up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more
284
286
# concise training loop. We subclass ``nn.Module`` (which itself is a class and
@@ -320,22 +322,26 @@ def forward(self, xb):
320
322
###############################################################################
321
323
# Previously for our training loop we had to update the values for each parameter
322
324
# by name, and manually zero out the grads for each parameter separately, like this:
325
+ #
323
326
# ::
324
- # with torch.no_grad():
325
- # weights -= weights.grad * lr
326
- # bias -= bias.grad * lr
327
- # weights.grad.zero_()
328
- # bias.grad.zero_()
327
+ #
328
+ # with torch.no_grad():
329
+ # weights -= weights.grad * lr
330
+ # bias -= bias.grad * lr
331
+ # weights.grad.zero_()
332
+ # bias.grad.zero_()
329
333
#
330
334
#
331
335
# Now we can take advantage of model.parameters() and model.zero_grad() (which
332
336
# are both defined by PyTorch for ``nn.Module``) to make those steps more concise
333
337
# and less prone to the error of forgetting some of our parameters, particularly
334
338
# if we had a more complicated model:
339
+ #
335
340
# ::
336
- # with torch.no_grad():
337
- # for p in model.parameters(): p -= p.grad * lr
338
- # model.zero_grad()
341
+ #
342
+ # with torch.no_grad():
343
+ # for p in model.parameters(): p -= p.grad * lr
344
+ # model.zero_grad()
339
345
#
340
346
#
341
347
# We'll wrap our little training loop in a ``fit`` function so we can run it
@@ -365,8 +371,8 @@ def fit():
365
371
print (loss_func (model (xb ), yb ))
366
372
367
373
###############################################################################
368
- # Refactor using nn.Linear
369
- # -------------------------
374
+ # Refactor using `` nn.Linear``
375
+ # ----------------------------
370
376
#
371
377
# We continue to refactor our code. Instead of manually defining and
372
378
# initializing ``self.weights`` and ``self.bias``, and calculating ``xb @
@@ -398,23 +404,27 @@ def forward(self, xb):
398
404
print (loss_func (model (xb ), yb ))
399
405
400
406
###############################################################################
401
- # Refactor using optim
407
+ # Refactor using ``torch. optim``
402
408
# ------------------------------
403
409
#
404
410
# Pytorch also has a package with various optimization algorithms, ``torch.optim``.
405
411
# We can use the ``step`` method from our optimizer to take a forward step, instead
406
412
# of manually updating each parameter.
407
413
#
408
414
# This will let us replace our previous manually coded optimization step:
415
+ #
409
416
# ::
410
- # with torch.no_grad():
411
- # for p in model.parameters(): p -= p.grad * lr
412
- # model.zero_grad()
417
+ #
418
+ # with torch.no_grad():
419
+ # for p in model.parameters(): p -= p.grad * lr
420
+ # model.zero_grad()
413
421
#
414
422
# and instead use just:
423
+ #
415
424
# ::
416
- # opt.step()
417
- # opt.zero_grad()
425
+ #
426
+ # opt.step()
427
+ # opt.zero_grad()
418
428
#
419
429
# (``optim.zero_grad()`` resets the gradient to 0 and we need to call it before
420
430
# computing the gradient for the next minibatch.)
@@ -473,15 +483,19 @@ def get_model():
473
483
train_ds = TensorDataset (x_train , y_train )
474
484
475
485
###############################################################################
476
- # Previously, we had to iterate through minibatches of x and y values separately:
486
+ # Previously, we had to iterate through minibatches of ``x`` and ``y`` values separately:
487
+ #
477
488
# ::
478
- # xb = x_train[start_i:end_i]
479
- # yb = y_train[start_i:end_i]
489
+ #
490
+ # xb = x_train[start_i:end_i]
491
+ # yb = y_train[start_i:end_i]
480
492
#
481
493
#
482
494
# Now, we can do these two steps together:
495
+ #
483
496
# ::
484
- # xb,yb = train_ds[i*bs : i*bs+bs]
497
+ #
498
+ # xb,yb = train_ds[i*bs : i*bs+bs]
485
499
#
486
500
487
501
model , opt = get_model ()
@@ -499,30 +513,34 @@ def get_model():
499
513
print (loss_func (model (xb ), yb ))
500
514
501
515
###############################################################################
502
- # Refactor using DataLoader
516
+ # Refactor using `` DataLoader``
503
517
# ------------------------------
504
518
#
505
- # Pytorch 's ``DataLoader`` is responsible for managing batches. You can
519
+ # PyTorch 's ``DataLoader`` is responsible for managing batches. You can
506
520
# create a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier
507
521
# to iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``,
508
- # the DataLoader gives us each minibatch automatically.
522
+ # the `` DataLoader`` gives us each minibatch automatically.
509
523
510
524
from torch .utils .data import DataLoader
511
525
512
526
train_ds = TensorDataset (x_train , y_train )
513
527
train_dl = DataLoader (train_ds , batch_size = bs )
514
528
515
529
###############################################################################
516
- # Previously, our loop iterated over batches (xb, yb) like this:
530
+ # Previously, our loop iterated over batches ``(xb, yb)`` like this:
531
+ #
517
532
# ::
518
- # for i in range((n-1)//bs + 1):
519
- # xb,yb = train_ds[i*bs : i*bs+bs]
520
- # pred = model(xb)
521
533
#
522
- # Now, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader:
534
+ # for i in range((n-1)//bs + 1):
535
+ # xb,yb = train_ds[i*bs : i*bs+bs]
536
+ # pred = model(xb)
537
+ #
538
+ # Now, our loop is much cleaner, as ``(xb, yb)`` are loaded automatically from the data loader:
539
+ #
523
540
# ::
524
- # for xb,yb in train_dl:
525
- # pred = model(xb)
541
+ #
542
+ # for xb,yb in train_dl:
543
+ # pred = model(xb)
526
544
527
545
model , opt = get_model ()
528
546
@@ -538,7 +556,7 @@ def get_model():
538
556
print (loss_func (model (xb ), yb ))
539
557
540
558
###############################################################################
541
- # Thanks to Pytorch 's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,
559
+ # Thanks to PyTorch 's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,
542
560
# our training loop is now dramatically smaller and easier to understand. Let's
543
561
# now try to add the basic features necessary to create effective models in practice.
544
562
#
@@ -573,7 +591,7 @@ def get_model():
573
591
#
574
592
# (Note that we always call ``model.train()`` before training, and ``model.eval()``
575
593
# before inference, because these are used by layers such as ``nn.BatchNorm2d``
576
- # and ``nn.Dropout`` to ensure appropriate behaviour for these different phases.)
594
+ # and ``nn.Dropout`` to ensure appropriate behavior for these different phases.)
577
595
578
596
model , opt = get_model ()
579
597
@@ -667,11 +685,11 @@ def get_data(train_ds, valid_ds, bs):
667
685
# Because none of the functions in the previous section assume anything about
668
686
# the model form, we'll be able to use them to train a CNN without any modification.
669
687
#
670
- # We will use Pytorch 's predefined
688
+ # We will use PyTorch 's predefined
671
689
# `Conv2d <https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_ class
672
690
# as our convolutional layer. We define a CNN with 3 convolutional layers.
673
691
# Each convolution is followed by a ReLU. At the end, we perform an
674
- # average pooling. (Note that ``view`` is PyTorch's version of numpy 's
692
+ # average pooling. (Note that ``view`` is PyTorch's version of Numpy 's
675
693
# ``reshape``)
676
694
677
695
class Mnist_CNN (nn .Module ):
@@ -702,7 +720,7 @@ def forward(self, xb):
702
720
fit (epochs , model , loss_func , opt , train_dl , valid_dl )
703
721
704
722
###############################################################################
705
- # nn.Sequential
723
+ # Using `` nn.Sequential``
706
724
# ------------------------
707
725
#
708
726
# ``torch.nn`` has another handy class we can use to simplify our code:
@@ -729,7 +747,7 @@ def preprocess(x):
729
747
return x .view (- 1 , 1 , 28 , 28 )
730
748
731
749
###############################################################################
732
- # The model created with ``Sequential`` is simply :
750
+ # The model created with ``Sequential`` is simple :
733
751
734
752
model = nn .Sequential (
735
753
Lambda (preprocess ),
@@ -748,7 +766,7 @@ def preprocess(x):
748
766
fit (epochs , model , loss_func , opt , train_dl , valid_dl )
749
767
750
768
###############################################################################
751
- # Wrapping DataLoader
769
+ # Wrapping `` DataLoader``
752
770
# -----------------------------
753
771
#
754
772
# Our CNN is fairly concise, but it only works with MNIST, because:
@@ -862,7 +880,7 @@ def preprocess(x, y):
862
880
# ``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize
863
881
# what we've seen:
864
882
#
865
- # - ** torch.nn**
883
+ # - `` torch.nn``:
866
884
#
867
885
# + ``Module``: creates a callable which behaves like a function, but can also
868
886
# contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it
0 commit comments