diff --git a/.pyspelling.yml b/.pyspelling.yml index ffe9f469d03..9c9b18800cc 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -4,6 +4,7 @@ matrix: sources: - beginner_source/*.py - intermediate_source/*.py + - advanced_source/*.py dictionary: wordlists: - en-wordlist.txt diff --git a/advanced_source/ddp_pipeline.py b/advanced_source/ddp_pipeline.py index 67040532194..1eb956a7836 100644 --- a/advanced_source/ddp_pipeline.py +++ b/advanced_source/ddp_pipeline.py @@ -75,7 +75,7 @@ def forward(self, x): # As a result, our focus is on ``nn.TransformerEncoder`` and we split the model # such that half of the ``nn.TransformerEncoderLayer`` are on one GPU and the # other half are on another. To do this, we pull out the ``Encoder`` and -# ``Decoder`` sections into seperate modules and then build an nn.Sequential +# ``Decoder`` sections into separate modules and then build an ``nn.Sequential`` # representing the original Transformer module. @@ -151,16 +151,17 @@ def run_worker(rank, world_size): # length 6: # # .. math:: -# \begin{bmatrix} -# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} -# \end{bmatrix} -# \Rightarrow -# \begin{bmatrix} -# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & -# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & -# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & -# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} -# \end{bmatrix} +# +# \begin{bmatrix} +# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} +# \end{bmatrix} +# \Rightarrow +# \begin{bmatrix} +# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & +# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & +# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & +# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} +# \end{bmatrix} # # These columns are treated as independent by the model, which means that # the dependence of ``G`` and ``F`` can not be learned, but allows more @@ -192,11 +193,11 @@ def data_process(raw_text_iter): device = torch.device(2 * rank) def batchify(data, bsz, rank, world_size, is_train=False): - # Divide the dataset into bsz parts. + # Divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. data = data.view(bsz, -1).t().contiguous() # Divide the data across the ranks only for training data. if is_train: @@ -261,14 +262,14 @@ def get_batch(source, i): # # The pipeline is then initialized with 8 transformer layers on one GPU and 8 # transformer layers on the other GPU. One pipe is setup across GPUs 0 and 1 and -# another across GPUs 2 and 3. Both pipes are then replicated using DistributedDataParallel. +# another across GPUs 2 and 3. Both pipes are then replicated using ``DistributedDataParallel``. # In 'run_worker' ntokens = len(vocab) # the size of vocabulary emsize = 4096 # embedding dimension - nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder - nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder - nhead = 16 # the number of heads in the multiheadattention models + nhid = 4096 # the dimension of the feedforward network model in ``nn.TransformerEncoder`` + nlayers = 8 # the number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder`` + nhead = 16 # the number of heads in the Multihead Attention models dropout = 0.2 # the dropout value from torch.distributed import rpc @@ -287,7 +288,7 @@ def get_batch(source, i): ) ) - # Num gpus for model parallelism. + # Number of GPUs for model parallelism. num_gpus = 2 partition_len = ((nlayers - 1) // num_gpus) + 1 diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py index 571c0e4a831..9cc07a1d956 100644 --- a/advanced_source/dynamic_quantization_tutorial.py +++ b/advanced_source/dynamic_quantization_tutorial.py @@ -130,12 +130,12 @@ def tokenize(self, path): corpus = Corpus(model_data_filepath + 'wikitext-2') ###################################################################### -# 3. Load the pre-trained model +# 3. Load the pretrained model # ----------------------------- # # This is a tutorial on dynamic quantization, a quantization technique # that is applied after a model has been trained. Therefore, we'll simply load some -# pre-trained weights into this model architecture; these weights were obtained +# pretrained weights into this model architecture; these weights were obtained # by training for five epochs using the default settings in the word language model # example. @@ -159,7 +159,7 @@ def tokenize(self, path): print(model) ###################################################################### -# Now let's generate some text to ensure that the pre-trained model is working +# Now let's generate some text to ensure that the pretrained model is working # properly - similarly to before, we follow # `here `_ @@ -200,11 +200,11 @@ def tokenize(self, path): # create test data set def batchify(data, bsz): - # Work out how cleanly we can divide the dataset into bsz parts. + # Work out how cleanly we can divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. return data.view(bsz, -1).t().contiguous() test_data = batchify(corpus.test, eval_batch_size) diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py index 099cb330859..3d84fc508bc 100644 --- a/advanced_source/neural_style_tutorial.py +++ b/advanced_source/neural_style_tutorial.py @@ -44,7 +44,7 @@ # - ``PIL``, ``PIL.Image``, ``matplotlib.pyplot`` (load and display # images) # - ``torchvision.transforms`` (transform PIL images into tensors) -# - ``torchvision.models`` (train or load pre-trained models) +# - ``torchvision.models`` (train or load pretrained models) # - ``copy`` (to deep copy the models; system package) from __future__ import print_function @@ -84,7 +84,7 @@ # torch library are trained with tensor values ranging from 0 to 1. If you # try to feed the networks with 0 to 255 tensor images, then the activated # feature maps will be unable to sense the intended content and style. -# However, pre-trained networks from the Caffe library are trained with 0 +# However, pretrained networks from the Caffe library are trained with 0 # to 255 tensor images. # # @@ -96,7 +96,7 @@ # with name ``images`` in your current working directory. # desired size of the output image -imsize = 512 if torch.cuda.is_available() else 128 # use small size if no gpu +imsize = 512 if torch.cuda.is_available() else 128 # use small size if no GPU loader = transforms.Compose([ transforms.Resize(imsize), # scale imported image @@ -220,7 +220,7 @@ def gram_matrix(input): # b=number of feature maps # (c,d)=dimensions of a f. map (N=c*d) - features = input.view(a * b, c * d) # resise F_XL into \hat F_XL + features = input.view(a * b, c * d) # resize F_XL into \hat F_XL G = torch.mm(features, features.t()) # compute the gram product @@ -251,7 +251,7 @@ def forward(self, input): # Importing the Model # ------------------- # -# Now we need to import a pre-trained neural network. We will use a 19 +# Now we need to import a pretrained neural network. We will use a 19 # layer VGG network like the one used in the paper. # # PyTorch’s implementation of VGG is a module divided into two child @@ -277,7 +277,7 @@ def forward(self, input): cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) # create a module to normalize input image so we can easily put it in a -# nn.Sequential +# ``nn.Sequential`` class Normalization(nn.Module): def __init__(self, mean, std): super(Normalization, self).__init__() @@ -288,14 +288,14 @@ def __init__(self, mean, std): self.std = torch.tensor(std).view(-1, 1, 1) def forward(self, img): - # normalize img + # normalize ``img`` return (img - self.mean) / self.std ###################################################################### # A ``Sequential`` module contains an ordered list of child modules. For -# instance, ``vgg19.features`` contains a sequence (Conv2d, ReLU, MaxPool2d, -# Conv2d, ReLU…) aligned in the right order of depth. We need to add our +# instance, ``vgg19.features`` contains a sequence (``Conv2d``, ``ReLU``, ``MaxPool2d``, +# ``Conv2d``, ``ReLU``…) aligned in the right order of depth. We need to add our # content loss and style loss layers immediately after the convolution # layer they are detecting. To do this we must create a new ``Sequential`` # module that has content loss and style loss modules correctly inserted. @@ -312,12 +312,12 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # normalization module normalization = Normalization(normalization_mean, normalization_std).to(device) - # just in order to have an iterable access to or list of content/syle + # just in order to have an iterable access to or list of content/style # losses content_losses = [] style_losses = [] - # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential + # assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential`` # to put in modules that are supposed to be activated sequentially model = nn.Sequential(normalization) @@ -328,8 +328,8 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) - # The in-place version doesn't play very nicely with the ContentLoss - # and StyleLoss we insert below. So we replace with out-of-place + # The in-place version doesn't play very nicely with the ``ContentLoss`` + # and ``StyleLoss`` we insert below. So we replace with out-of-place # ones here. layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): @@ -371,8 +371,11 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # input_img = content_img.clone() -# if you want to use white noise instead uncomment the below line: -# input_img = torch.randn(content_img.data.size(), device=device) +# if you want to use white noise by using the following code: +# +# :: +# +# input_img = torch.randn(content_img.data.size(), device=device) # add the original input image to the figure: plt.figure() @@ -385,7 +388,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # # As Leon Gatys, the author of the algorithm, suggested `here `__, we will use # L-BFGS algorithm to run our gradient descent. Unlike training a network, -# we want to train the input image in order to minimise the content/style +# we want to train the input image in order to minimize the content/style # losses. We will create a PyTorch L-BFGS optimizer ``optim.LBFGS`` and pass # our image to it as the tensor to optimize. # @@ -400,7 +403,7 @@ def get_input_optimizer(input_img): # Finally, we must define a function that performs the neural transfer. For # each iteration of the networks, it is fed an updated input and computes # new losses. We will run the ``backward`` methods of each loss module to -# dynamicaly compute their gradients. The optimizer requires a “closure” +# dynamically compute their gradients. The optimizer requires a “closure” # function, which reevaluates the module and returns the loss. # # We still have one final constraint to address. The network may try to diff --git a/advanced_source/numpy_extensions_tutorial.py b/advanced_source/numpy_extensions_tutorial.py index afc9a118c30..8ccd92d3765 100644 --- a/advanced_source/numpy_extensions_tutorial.py +++ b/advanced_source/numpy_extensions_tutorial.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Creating Extensions Using numpy and scipy +Creating Extensions Using NumPy and SciPy ========================================= **Author**: `Adam Paszke `_ @@ -27,7 +27,7 @@ # This layer doesn’t particularly do anything useful or mathematically # correct. # -# It is aptly named BadFFTFunction +# It is aptly named ``BadFFTFunction`` # # **Layer Implementation** @@ -48,7 +48,7 @@ def backward(ctx, grad_output): return grad_output.new(result) # since this layer does not have any parameters, we can -# simply declare this as a function, rather than as an nn.Module class +# simply declare this as a function, rather than as an ``nn.Module`` class def incorrect_fft(input): @@ -75,7 +75,7 @@ def incorrect_fft(input): # Implementation of a layer with learnable weights, where cross-correlation # has a filter (kernel) that represents weights. # -# The backward pass computes the gradient wrt the input and the gradient wrt the filter. +# The backward pass computes the gradient ``wrt`` the input and the gradient ``wrt`` the filter. from numpy import flip import numpy as np diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py index 91dfc806398..eb184e85109 100644 --- a/advanced_source/super_resolution_with_onnxruntime.py +++ b/advanced_source/super_resolution_with_onnxruntime.py @@ -37,12 +37,12 @@ # and is widely used in image processing or video editing. For this # tutorial, we will use a small super-resolution model. # -# First, let's create a SuperResolution model in PyTorch. +# First, let's create a ``SuperResolution`` model in PyTorch. # This model uses the efficient sub-pixel convolution layer described in # `"Real-Time Single Image and Video Super-Resolution Using an Efficient # Sub-Pixel Convolutional Neural Network" - Shi et al `__ # for increasing the resolution of an image by an upscale factor. -# The model expects the Y component of the YCbCr of an image as an input, and +# The model expects the Y component of the ``YCbCr`` of an image as an input, and # outputs the upscaled Y component in super resolution. # # `The @@ -87,7 +87,7 @@ def _initialize_weights(self): ###################################################################### # Ordinarily, you would now train this model; however, for this tutorial, -# we will instead download some pre-trained weights. Note that this model +# we will instead download some pretrained weights. Note that this model # was not trained fully for good accuracy and is used here for # demonstration purposes only. # @@ -154,9 +154,9 @@ def _initialize_weights(self): # the same values when run in ONNX Runtime. # # But before verifying the model's output with ONNX Runtime, we will check -# the ONNX model with ONNX's API. +# the ONNX model with ONNX API. # First, ``onnx.load("super_resolution.onnx")`` will load the saved model and -# will output a onnx.ModelProto structure (a top-level file/container format for bundling a ML model. +# will output a ``onnx.ModelProto`` structure (a top-level file/container format for bundling a ML model. # For more information `onnx.proto documentation `__.). # Then, ``onnx.checker.check_model(onnx_model)`` will verify the model's structure # and confirm that the model has a valid schema. @@ -181,7 +181,7 @@ def _initialize_weights(self): # In order to run the model with ONNX Runtime, we need to create an # inference session for the model with the chosen configuration # parameters (here we use the default config). -# Once the session is created, we evaluate the model using the run() api. +# Once the session is created, we evaluate the model using the run() API. # The output of this call is a list containing the outputs of the model # computed by ONNX Runtime. # @@ -205,7 +205,7 @@ def to_numpy(tensor): ###################################################################### # We should see that the output of PyTorch and ONNX Runtime runs match -# numerically with the given precision (rtol=1e-03 and atol=1e-05). +# numerically with the given precision (``rtol=1e-03`` and ``atol=1e-05``). # As a side-note, if they do not match then there is an issue in the # ONNX exporter, so please contact us in that case. # @@ -230,13 +230,13 @@ def to_numpy(tensor): # ###################################################################### -# First, let's load the image, pre-process it using standard PIL +# First, let's load the image, preprocess it using standard PIL # python library. Note that this preprocessing is the standard practice of # processing data for training/testing neural networks. # # We first resize the image to fit the size of the model's input (224x224). # Then we split the image into its Y, Cb, and Cr components. -# These components represent a greyscale image (Y), and +# These components represent a grayscale image (Y), and # the blue-difference (Cb) and red-difference (Cr) chroma components. # The Y component being more sensitive to the human eye, we are # interested in this component which we will be transforming. @@ -262,7 +262,7 @@ def to_numpy(tensor): ###################################################################### # Now, as a next step, let's take the tensor representing the -# greyscale resized cat image and run the super-resolution model in +# grayscale resized cat image and run the super-resolution model in # ONNX Runtime as explained previously. # diff --git a/en-wordlist.txt b/en-wordlist.txt index 025098fd7ee..0b7a5417953 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -3,10 +3,12 @@ ATen Args Autograd BCE +BFGS BN BOS Bahdanau BatchNorm +Bethge CHW CIFAR CLS @@ -14,6 +16,7 @@ CNNDM CNNs CPUs CUDA +Caffe CartPole Cayley Chatbots @@ -33,6 +36,7 @@ DeiT DenseNet EOS EPS +Ecker FC FGSM FLAVA @@ -45,11 +49,13 @@ GAE GAN GANs GLOO +GPT GPU's GPUs GRU GRUs GTC +Gatys GeForce Goodfellow Goodfellow’s @@ -93,7 +99,9 @@ NeurIPS NumPy Numericalization Numpy's +ONNX OpenAI +PIL PPO Plotly Prec @@ -108,11 +116,13 @@ RTX Radford ReLU ResNet +Runtime's SDPA SGD SPD SST2 STN +SciPy Sequentials Sigmoid SoTA @@ -130,6 +140,7 @@ TorchX Tunable UI Unescape +VGG VQA VS Code Wikitext @@ -163,6 +174,7 @@ cardinality chatbot chatbot's checkpointing +chroma colorbar compilable composable @@ -219,6 +231,7 @@ hvp hyperparameter hyperparameters imagenet +inferencing initializations inlined interpretable @@ -333,6 +346,7 @@ timesteps tokenization tokenize tokenizer +tokenizes tooltip topologies torchaudio @@ -355,6 +369,7 @@ unparametrized unpickling unpruned updation +upscaled utils vectorization vectorize