pytorch
diff --git a/‎.pyspelling.yml
Lines changed: 9 additions & 7 deletions b/‎.pyspelling.yml
Lines changed: 9 additions & 7 deletions
diff --git a/‎beginner_source/fgsm_tutorial.py
Lines changed: 6 additions & 6 deletions b/‎beginner_source/fgsm_tutorial.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎beginner_source/flava_finetuning_tutorial.py
Lines changed: 1 addition & 1 deletion b/‎beginner_source/flava_finetuning_tutorial.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎beginner_source/hyperparameter_tuning_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎beginner_source/hyperparameter_tuning_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎beginner_source/nn_tutorial.py
Lines changed: 63 additions & 45 deletions b/‎beginner_source/nn_tutorial.py
Lines changed: 63 additions & 45 deletions
@@ -2,12 +2,7 @@ spellchecker: aspell
 matrix:
 - name: python
   sources:
-    - beginner_source/data_loading_tutorial.py
-    - beginner_source/chatbot_tutorial.py
-    - beginner_source/Intro_to_TorchScript_tutorial.py
-    - beginner_source/dcgan_faces_tutorial.py
-    - beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py
-    - beginner_source/flava_finetuning_tutorial.py
+    - beginner_source/*.py
   dictionary:
     wordlists:
       - en-wordlist.txt
@@ -18,7 +13,7 @@ matrix:
       context_visible_first: true
       delimiters:
         # Exclude figure rST tags
-        - open: '\.\.\s+(figure|literalinclude|math)::'
+        - open: '\.\.\s+(figure|literalinclude|math|image)::'
           close: '\n'
         # Exclude Python coding directives
         - open: '-\*- coding:'
@@ -29,6 +24,13 @@ matrix:
         # Exclude .rst directives:
         - open: ':math:`.*`'
           close: ' '
+        # Ignore multiline content in codeblock
+        - open: '(?s)^::\n\n  '
+          close: '^\n'
+        # Ignore reStructuredText block directives
+        - open: '\.\. (code-block)::.*$\n*'
+          content: '(?P<first>(^(?P<indent>[ ]+).*$\n))(?P<other>(^([ \t]+.*|[ \t]*)$\n)*)'
+          close: '(^(?![ \t]+.*$))'
   - pyspelling.filters.markdown:
   - pyspelling.filters.html:
       ignores:
 
@@ -120,20 +120,20 @@
 # There are only three inputs for this tutorial, and are defined as
 # follows:
 # 
-# -  **epsilons** - List of epsilon values to use for the run. It is
+# -  ``epsilons`` - List of epsilon values to use for the run. It is
 #    important to keep 0 in the list because it represents the model
 #    performance on the original test set. Also, intuitively we would
 #    expect the larger the epsilon, the more noticeable the perturbations
 #    but the more effective the attack in terms of degrading model
 #    accuracy. Since the data range here is :math:`[0,1]`, no epsilon
 #    value should exceed 1.
 # 
-# -  **pretrained_model** - path to the pretrained MNIST model which was
+# -  ``pretrained_model`` - path to the pretrained MNIST model which was
 #    trained with
 #    `pytorch/examples/mnist <https://github.com/pytorch/examples/tree/master/mnist>`__.
 #    For simplicity, download the pretrained model `here <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`__.
 # 
-# -  **use_cuda** - boolean flag to use CUDA if desired and available.
+# -  ``use_cuda`` - boolean flag to use CUDA if desired and available.
 #    Note, a GPU with CUDA is not critical for this tutorial as a CPU will
 #    not take much time.
 # 
@@ -263,7 +263,7 @@ def test( model, device, test_loader, epsilon ):
         output = model(data)
         init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
 
-        # If the initial prediction is wrong, dont bother attacking, just move on
+        # If the initial prediction is wrong, don't bother attacking, just move on
         if init_pred.item() != target.item():
             continue
 
@@ -276,7 +276,7 @@ def test( model, device, test_loader, epsilon ):
         # Calculate gradients of model in backward pass
         loss.backward()
 
-        # Collect datagrad
+        # Collect ``datagrad``
         data_grad = data.grad.data
 
         # Call FGSM Attack
@@ -366,7 +366,7 @@ def test( model, device, test_loader, epsilon ):
 # Remember the idea of no free lunch? In this case, as epsilon increases
 # the test accuracy decreases **BUT** the perturbations become more easily
 # perceptible. In reality, there is a tradeoff between accuracy
-# degredation and perceptibility that an attacker must consider. Here, we
+# degradation and perceptibility that an attacker must consider. Here, we
 # show some examples of successful adversarial examples at each epsilon
 # value. Each row of the plot shows a different epsilon value. The first
 # row is the :math:`\epsilon=0` examples which represent the original
 
@@ -24,7 +24,7 @@
 ######################################################################
 # Installation
 # -----------------
-# We will use TextVQA dataset and bert tokenizer from Hugging Face for this
+# We will use TextVQA dataset and ``bert tokenizer`` from Hugging Face for this
 # tutorial. So you need to install datasets and transformers in addition to TorchMultimodal.
 #
 # .. note::
 
@@ -389,7 +389,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
         grace_period=1,
         reduction_factor=2)
     reporter = CLIReporter(
-        # parameter_columns=["l1", "l2", "lr", "batch_size"],
+        # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``,
         metric_columns=["loss", "accuracy", "training_iteration"])
     result = tune.run(
         partial(train_cifar, data_dir=data_dir),
@@ -425,7 +425,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 if __name__ == "__main__":
     # sphinx_gallery_start_ignore
-    # Fixes AttributeError: '_LoggingTee' object has no attribute 'fileno'.
+    # Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
     # This is only needed to run with sphinx-build.
     import sys
     sys.stdout.fileno = lambda: False
 
@@ -2,10 +2,12 @@
 """
 What is `torch.nn` *really*?
 ============================
-by Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.
+
+**Authors:** Jeremy Howard, `fast.ai <https://www.fast.ai>`_. Thanks to Rachel Thomas and Francisco Ingham.
 """
+
 ###############################################################################
-# We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file,
+# We recommend running this tutorial as a notebook, not a script. To download the notebook (``.ipynb``) file,
 # click the link at the top of the page.
 #
 # PyTorch provides the elegantly designed modules and classes `torch.nn <https://pytorch.org/docs/stable/nn.html>`_ ,
@@ -90,7 +92,7 @@
 print(y_train.min(), y_train.max())
 
 ###############################################################################
-# Neural net from scratch (no torch.nn)
+# Neural net from scratch (without ``torch.nn``)
 # ---------------------------------------------
 #
 # Let's first create a model using nothing but PyTorch tensor operations. We're assuming
@@ -109,7 +111,7 @@
 #
 # .. note:: We are initializing the weights here with
 #    `Xavier initialisation <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
-#    (by multiplying with 1/sqrt(n)).
+#    (by multiplying with ``1/sqrt(n)``).
 
 import math
 
@@ -123,7 +125,7 @@
 # let's just write a plain matrix multiplication and broadcasted addition
 # to create a simple linear model. We also need an activation function, so
 # we'll write `log_softmax` and use it. Remember: although PyTorch
-# provides lots of pre-written loss functions, activation functions, and
+# provides lots of prewritten loss functions, activation functions, and
 # so forth, you can easily write your own using plain python. PyTorch will
 # even create fast GPU or vectorized CPU code for your function
 # automatically.
@@ -242,7 +244,7 @@ def accuracy(out, yb):
 print(loss_func(model(xb), yb), accuracy(model(xb), yb))
 
 ###############################################################################
-# Using torch.nn.functional
+# Using ``torch.nn.functional``
 # ------------------------------
 #
 # We will now refactor our code, so that it does the same thing as before, only
@@ -278,7 +280,7 @@ def model(xb):
 print(loss_func(model(xb), yb), accuracy(model(xb), yb))
 
 ###############################################################################
-# Refactor using nn.Module
+# Refactor using ``nn.Module``
 # -----------------------------
 # Next up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more
 # concise training loop. We subclass ``nn.Module`` (which itself is a class and
@@ -320,22 +322,26 @@ def forward(self, xb):
 ###############################################################################
 # Previously for our training loop we had to update the values for each parameter
 # by name, and manually zero out the grads for each parameter separately, like this:
+#
 # ::
-#   with torch.no_grad():
-#       weights -= weights.grad * lr
-#       bias -= bias.grad * lr
-#       weights.grad.zero_()
-#       bias.grad.zero_()
+#
+#    with torch.no_grad():
+#        weights -= weights.grad * lr
+#        bias -= bias.grad * lr
+#        weights.grad.zero_()
+#        bias.grad.zero_()
 #
 #
 # Now we can take advantage of model.parameters() and model.zero_grad() (which
 # are both defined by PyTorch for ``nn.Module``) to make those steps more concise
 # and less prone to the error of forgetting some of our parameters, particularly
 # if we had a more complicated model:
+#
 # ::
-#   with torch.no_grad():
-#       for p in model.parameters(): p -= p.grad * lr
-#       model.zero_grad()
+#
+#    with torch.no_grad():
+#        for p in model.parameters(): p -= p.grad * lr
+#        model.zero_grad()
 #
 #
 # We'll wrap our little training loop in a ``fit`` function so we can run it
@@ -365,8 +371,8 @@ def fit():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# Refactor using nn.Linear
-# -------------------------
+# Refactor using ``nn.Linear``
+# ----------------------------
 #
 # We continue to refactor our code.  Instead of manually defining and
 # initializing ``self.weights`` and ``self.bias``, and calculating ``xb  @
@@ -398,23 +404,27 @@ def forward(self, xb):
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# Refactor using optim
+# Refactor using ``torch.optim``
 # ------------------------------
 #
 # Pytorch also has a package with various optimization algorithms, ``torch.optim``.
 # We can use the ``step`` method from our optimizer to take a forward step, instead
 # of manually updating each parameter.
 #
 # This will let us replace our previous manually coded optimization step:
+#
 # ::
-#   with torch.no_grad():
-#       for p in model.parameters(): p -= p.grad * lr
-#       model.zero_grad()
+#
+#    with torch.no_grad():
+#        for p in model.parameters(): p -= p.grad * lr
+#        model.zero_grad()
 #
 # and instead use just:
+#
 # ::
-#   opt.step()
-#   opt.zero_grad()
+#
+#    opt.step()
+#    opt.zero_grad()
 #
 # (``optim.zero_grad()`` resets the gradient to 0 and we need to call it before
 # computing the gradient for the next minibatch.)
@@ -473,15 +483,19 @@ def get_model():
 train_ds = TensorDataset(x_train, y_train)
 
 ###############################################################################
-# Previously, we had to iterate through minibatches of x and y values separately:
+# Previously, we had to iterate through minibatches of ``x`` and ``y`` values separately:
+#
 # ::
-#     xb = x_train[start_i:end_i]
-#     yb = y_train[start_i:end_i]
+#
+#    xb = x_train[start_i:end_i]
+#    yb = y_train[start_i:end_i]
 #
 #
 # Now, we can do these two steps together:
+#
 # ::
-#     xb,yb = train_ds[i*bs : i*bs+bs]
+#
+#    xb,yb = train_ds[i*bs : i*bs+bs]
 #
 
 model, opt = get_model()
@@ -499,30 +513,34 @@ def get_model():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# Refactor using DataLoader
+# Refactor using ``DataLoader``
 # ------------------------------
 #
-# Pytorch's ``DataLoader`` is responsible for managing batches. You can
+# PyTorch's ``DataLoader`` is responsible for managing batches. You can
 # create a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier
 # to iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``,
-# the DataLoader gives us each minibatch automatically.
+# the ``DataLoader`` gives us each minibatch automatically.
 
 from torch.utils.data import DataLoader
 
 train_ds = TensorDataset(x_train, y_train)
 train_dl = DataLoader(train_ds, batch_size=bs)
 
 ###############################################################################
-# Previously, our loop iterated over batches (xb, yb) like this:
+# Previously, our loop iterated over batches ``(xb, yb)`` like this:
+#
 # ::
-#       for i in range((n-1)//bs + 1):
-#           xb,yb = train_ds[i*bs : i*bs+bs]
-#           pred = model(xb)
 #
-# Now, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader:
+#    for i in range((n-1)//bs + 1):
+#        xb,yb = train_ds[i*bs : i*bs+bs]
+#        pred = model(xb)
+#
+# Now, our loop is much cleaner, as ``(xb, yb)`` are loaded automatically from the data loader:
+#
 # ::
-#       for xb,yb in train_dl:
-#           pred = model(xb)
+#
+#    for xb,yb in train_dl:
+#        pred = model(xb)
 
 model, opt = get_model()
 
@@ -538,7 +556,7 @@ def get_model():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# Thanks to Pytorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,
+# Thanks to PyTorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,
 # our training loop is now dramatically smaller and easier to understand. Let's
 # now try to add the basic features necessary to create effective models in practice.
 #
@@ -573,7 +591,7 @@ def get_model():
 #
 # (Note that we always call ``model.train()`` before training, and ``model.eval()``
 # before inference, because these are used by layers such as ``nn.BatchNorm2d``
-# and ``nn.Dropout`` to ensure appropriate behaviour for these different phases.)
+# and ``nn.Dropout`` to ensure appropriate behavior for these different phases.)
 
 model, opt = get_model()
 
@@ -667,11 +685,11 @@ def get_data(train_ds, valid_ds, bs):
 # Because none of the functions in the previous section assume anything about
 # the model form, we'll be able to use them to train a CNN without any modification.
 #
-# We will use Pytorch's predefined
+# We will use PyTorch's predefined
 # `Conv2d <https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_ class
 # as our convolutional layer. We define a CNN with 3 convolutional layers.
 # Each convolution is followed by a ReLU.  At the end, we perform an
-# average pooling.  (Note that ``view`` is PyTorch's version of numpy's
+# average pooling.  (Note that ``view`` is PyTorch's version of Numpy's
 # ``reshape``)
 
 class Mnist_CNN(nn.Module):
@@ -702,7 +720,7 @@ def forward(self, xb):
 fit(epochs, model, loss_func, opt, train_dl, valid_dl)
 
 ###############################################################################
-# nn.Sequential
+# Using ``nn.Sequential``
 # ------------------------
 #
 # ``torch.nn`` has another handy class we can use to simplify our code:
@@ -729,7 +747,7 @@ def preprocess(x):
     return x.view(-1, 1, 28, 28)
 
 ###############################################################################
-# The model created with ``Sequential`` is simply:
+# The model created with ``Sequential`` is simple:
 
 model = nn.Sequential(
     Lambda(preprocess),
@@ -748,7 +766,7 @@ def preprocess(x):
 fit(epochs, model, loss_func, opt, train_dl, valid_dl)
 
 ###############################################################################
-# Wrapping DataLoader
+# Wrapping ``DataLoader``
 # -----------------------------
 #
 # Our CNN is fairly concise, but it only works with MNIST, because:
@@ -862,7 +880,7 @@ def preprocess(x, y):
 # ``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize
 # what we've seen:
 #
-#  - **torch.nn**
+#  - ``torch.nn``:
 #
 #    + ``Module``: creates a callable which behaves like a function, but can also
 #      contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`######################################################################`
`25`	`25`	`# Installation`
`26`	`26`	`# -----------------`
`27`		`-# We will use TextVQA dataset and bert tokenizer from Hugging Face for this`
	`27`	+# We will use TextVQA dataset and ``bert tokenizer`` from Hugging Face for this
`28`	`28`	`# tutorial. So you need to install datasets and transformers in addition to TorchMultimodal.`
`29`	`29`	`#`
`30`	`30`	`# .. note::`