From d5860249da0f4f91c971b2774c16043d916a313a Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Apr 2023 09:18:29 -0700 Subject: [PATCH 1/3] Pyspelling: Advanced Python tutorials --- .pyspelling.yml | 10 ++++- advanced_source/ddp_pipeline.py | 37 ++++++++++--------- .../dynamic_quantization_tutorial.py | 10 ++--- advanced_source/neural_style_tutorial.py | 37 ++++++++++--------- advanced_source/numpy_extensions_tutorial.py | 8 ++-- .../super_resolution_with_onnxruntime.py | 20 +++++----- en-wordlist.txt | 15 ++++++++ 7 files changed, 81 insertions(+), 56 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index ffe9f469d03..017adc44c68 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -2,8 +2,14 @@ spellchecker: aspell matrix: - name: python sources: - - beginner_source/*.py - - intermediate_source/*.py + #- beginner_source/*.py + #- intermediate_source/*.py + - advanced_source/*.py + #- advanced_source/ddp_pipeline.py + #- advanced_source/dynamic_quantization_tutorial.py + #- advanced_source/neural_style_tutorial.py + #- advanced_source/numpy_extensions_tutorial.py + #- advanced_source/super_resolution_with_onnxruntime.py dictionary: wordlists: - en-wordlist.txt diff --git a/advanced_source/ddp_pipeline.py b/advanced_source/ddp_pipeline.py index 67040532194..1eb956a7836 100644 --- a/advanced_source/ddp_pipeline.py +++ b/advanced_source/ddp_pipeline.py @@ -75,7 +75,7 @@ def forward(self, x): # As a result, our focus is on ``nn.TransformerEncoder`` and we split the model # such that half of the ``nn.TransformerEncoderLayer`` are on one GPU and the # other half are on another. To do this, we pull out the ``Encoder`` and -# ``Decoder`` sections into seperate modules and then build an nn.Sequential +# ``Decoder`` sections into separate modules and then build an ``nn.Sequential`` # representing the original Transformer module. @@ -151,16 +151,17 @@ def run_worker(rank, world_size): # length 6: # # .. math:: -# \begin{bmatrix} -# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} -# \end{bmatrix} -# \Rightarrow -# \begin{bmatrix} -# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & -# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & -# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & -# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} -# \end{bmatrix} +# +# \begin{bmatrix} +# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} +# \end{bmatrix} +# \Rightarrow +# \begin{bmatrix} +# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & +# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & +# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & +# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} +# \end{bmatrix} # # These columns are treated as independent by the model, which means that # the dependence of ``G`` and ``F`` can not be learned, but allows more @@ -192,11 +193,11 @@ def data_process(raw_text_iter): device = torch.device(2 * rank) def batchify(data, bsz, rank, world_size, is_train=False): - # Divide the dataset into bsz parts. + # Divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. data = data.view(bsz, -1).t().contiguous() # Divide the data across the ranks only for training data. if is_train: @@ -261,14 +262,14 @@ def get_batch(source, i): # # The pipeline is then initialized with 8 transformer layers on one GPU and 8 # transformer layers on the other GPU. One pipe is setup across GPUs 0 and 1 and -# another across GPUs 2 and 3. Both pipes are then replicated using DistributedDataParallel. +# another across GPUs 2 and 3. Both pipes are then replicated using ``DistributedDataParallel``. # In 'run_worker' ntokens = len(vocab) # the size of vocabulary emsize = 4096 # embedding dimension - nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder - nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder - nhead = 16 # the number of heads in the multiheadattention models + nhid = 4096 # the dimension of the feedforward network model in ``nn.TransformerEncoder`` + nlayers = 8 # the number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder`` + nhead = 16 # the number of heads in the Multihead Attention models dropout = 0.2 # the dropout value from torch.distributed import rpc @@ -287,7 +288,7 @@ def get_batch(source, i): ) ) - # Num gpus for model parallelism. + # Number of GPUs for model parallelism. num_gpus = 2 partition_len = ((nlayers - 1) // num_gpus) + 1 diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py index 571c0e4a831..9cc07a1d956 100644 --- a/advanced_source/dynamic_quantization_tutorial.py +++ b/advanced_source/dynamic_quantization_tutorial.py @@ -130,12 +130,12 @@ def tokenize(self, path): corpus = Corpus(model_data_filepath + 'wikitext-2') ###################################################################### -# 3. Load the pre-trained model +# 3. Load the pretrained model # ----------------------------- # # This is a tutorial on dynamic quantization, a quantization technique # that is applied after a model has been trained. Therefore, we'll simply load some -# pre-trained weights into this model architecture; these weights were obtained +# pretrained weights into this model architecture; these weights were obtained # by training for five epochs using the default settings in the word language model # example. @@ -159,7 +159,7 @@ def tokenize(self, path): print(model) ###################################################################### -# Now let's generate some text to ensure that the pre-trained model is working +# Now let's generate some text to ensure that the pretrained model is working # properly - similarly to before, we follow # `here `_ @@ -200,11 +200,11 @@ def tokenize(self, path): # create test data set def batchify(data, bsz): - # Work out how cleanly we can divide the dataset into bsz parts. + # Work out how cleanly we can divide the dataset into ``bsz`` parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. + # Evenly divide the data across the ``bsz`` batches. return data.view(bsz, -1).t().contiguous() test_data = batchify(corpus.test, eval_batch_size) diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py index 099cb330859..3d84fc508bc 100644 --- a/advanced_source/neural_style_tutorial.py +++ b/advanced_source/neural_style_tutorial.py @@ -44,7 +44,7 @@ # - ``PIL``, ``PIL.Image``, ``matplotlib.pyplot`` (load and display # images) # - ``torchvision.transforms`` (transform PIL images into tensors) -# - ``torchvision.models`` (train or load pre-trained models) +# - ``torchvision.models`` (train or load pretrained models) # - ``copy`` (to deep copy the models; system package) from __future__ import print_function @@ -84,7 +84,7 @@ # torch library are trained with tensor values ranging from 0 to 1. If you # try to feed the networks with 0 to 255 tensor images, then the activated # feature maps will be unable to sense the intended content and style. -# However, pre-trained networks from the Caffe library are trained with 0 +# However, pretrained networks from the Caffe library are trained with 0 # to 255 tensor images. # # @@ -96,7 +96,7 @@ # with name ``images`` in your current working directory. # desired size of the output image -imsize = 512 if torch.cuda.is_available() else 128 # use small size if no gpu +imsize = 512 if torch.cuda.is_available() else 128 # use small size if no GPU loader = transforms.Compose([ transforms.Resize(imsize), # scale imported image @@ -220,7 +220,7 @@ def gram_matrix(input): # b=number of feature maps # (c,d)=dimensions of a f. map (N=c*d) - features = input.view(a * b, c * d) # resise F_XL into \hat F_XL + features = input.view(a * b, c * d) # resize F_XL into \hat F_XL G = torch.mm(features, features.t()) # compute the gram product @@ -251,7 +251,7 @@ def forward(self, input): # Importing the Model # ------------------- # -# Now we need to import a pre-trained neural network. We will use a 19 +# Now we need to import a pretrained neural network. We will use a 19 # layer VGG network like the one used in the paper. # # PyTorch’s implementation of VGG is a module divided into two child @@ -277,7 +277,7 @@ def forward(self, input): cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device) # create a module to normalize input image so we can easily put it in a -# nn.Sequential +# ``nn.Sequential`` class Normalization(nn.Module): def __init__(self, mean, std): super(Normalization, self).__init__() @@ -288,14 +288,14 @@ def __init__(self, mean, std): self.std = torch.tensor(std).view(-1, 1, 1) def forward(self, img): - # normalize img + # normalize ``img`` return (img - self.mean) / self.std ###################################################################### # A ``Sequential`` module contains an ordered list of child modules. For -# instance, ``vgg19.features`` contains a sequence (Conv2d, ReLU, MaxPool2d, -# Conv2d, ReLU…) aligned in the right order of depth. We need to add our +# instance, ``vgg19.features`` contains a sequence (``Conv2d``, ``ReLU``, ``MaxPool2d``, +# ``Conv2d``, ``ReLU``…) aligned in the right order of depth. We need to add our # content loss and style loss layers immediately after the convolution # layer they are detecting. To do this we must create a new ``Sequential`` # module that has content loss and style loss modules correctly inserted. @@ -312,12 +312,12 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # normalization module normalization = Normalization(normalization_mean, normalization_std).to(device) - # just in order to have an iterable access to or list of content/syle + # just in order to have an iterable access to or list of content/style # losses content_losses = [] style_losses = [] - # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential + # assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential`` # to put in modules that are supposed to be activated sequentially model = nn.Sequential(normalization) @@ -328,8 +328,8 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) - # The in-place version doesn't play very nicely with the ContentLoss - # and StyleLoss we insert below. So we replace with out-of-place + # The in-place version doesn't play very nicely with the ``ContentLoss`` + # and ``StyleLoss`` we insert below. So we replace with out-of-place # ones here. layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): @@ -371,8 +371,11 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # input_img = content_img.clone() -# if you want to use white noise instead uncomment the below line: -# input_img = torch.randn(content_img.data.size(), device=device) +# if you want to use white noise by using the following code: +# +# :: +# +# input_img = torch.randn(content_img.data.size(), device=device) # add the original input image to the figure: plt.figure() @@ -385,7 +388,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std, # # As Leon Gatys, the author of the algorithm, suggested `here `__, we will use # L-BFGS algorithm to run our gradient descent. Unlike training a network, -# we want to train the input image in order to minimise the content/style +# we want to train the input image in order to minimize the content/style # losses. We will create a PyTorch L-BFGS optimizer ``optim.LBFGS`` and pass # our image to it as the tensor to optimize. # @@ -400,7 +403,7 @@ def get_input_optimizer(input_img): # Finally, we must define a function that performs the neural transfer. For # each iteration of the networks, it is fed an updated input and computes # new losses. We will run the ``backward`` methods of each loss module to -# dynamicaly compute their gradients. The optimizer requires a “closure” +# dynamically compute their gradients. The optimizer requires a “closure” # function, which reevaluates the module and returns the loss. # # We still have one final constraint to address. The network may try to diff --git a/advanced_source/numpy_extensions_tutorial.py b/advanced_source/numpy_extensions_tutorial.py index afc9a118c30..8ccd92d3765 100644 --- a/advanced_source/numpy_extensions_tutorial.py +++ b/advanced_source/numpy_extensions_tutorial.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Creating Extensions Using numpy and scipy +Creating Extensions Using NumPy and SciPy ========================================= **Author**: `Adam Paszke `_ @@ -27,7 +27,7 @@ # This layer doesn’t particularly do anything useful or mathematically # correct. # -# It is aptly named BadFFTFunction +# It is aptly named ``BadFFTFunction`` # # **Layer Implementation** @@ -48,7 +48,7 @@ def backward(ctx, grad_output): return grad_output.new(result) # since this layer does not have any parameters, we can -# simply declare this as a function, rather than as an nn.Module class +# simply declare this as a function, rather than as an ``nn.Module`` class def incorrect_fft(input): @@ -75,7 +75,7 @@ def incorrect_fft(input): # Implementation of a layer with learnable weights, where cross-correlation # has a filter (kernel) that represents weights. # -# The backward pass computes the gradient wrt the input and the gradient wrt the filter. +# The backward pass computes the gradient ``wrt`` the input and the gradient ``wrt`` the filter. from numpy import flip import numpy as np diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py index 91dfc806398..eb184e85109 100644 --- a/advanced_source/super_resolution_with_onnxruntime.py +++ b/advanced_source/super_resolution_with_onnxruntime.py @@ -37,12 +37,12 @@ # and is widely used in image processing or video editing. For this # tutorial, we will use a small super-resolution model. # -# First, let's create a SuperResolution model in PyTorch. +# First, let's create a ``SuperResolution`` model in PyTorch. # This model uses the efficient sub-pixel convolution layer described in # `"Real-Time Single Image and Video Super-Resolution Using an Efficient # Sub-Pixel Convolutional Neural Network" - Shi et al `__ # for increasing the resolution of an image by an upscale factor. -# The model expects the Y component of the YCbCr of an image as an input, and +# The model expects the Y component of the ``YCbCr`` of an image as an input, and # outputs the upscaled Y component in super resolution. # # `The @@ -87,7 +87,7 @@ def _initialize_weights(self): ###################################################################### # Ordinarily, you would now train this model; however, for this tutorial, -# we will instead download some pre-trained weights. Note that this model +# we will instead download some pretrained weights. Note that this model # was not trained fully for good accuracy and is used here for # demonstration purposes only. # @@ -154,9 +154,9 @@ def _initialize_weights(self): # the same values when run in ONNX Runtime. # # But before verifying the model's output with ONNX Runtime, we will check -# the ONNX model with ONNX's API. +# the ONNX model with ONNX API. # First, ``onnx.load("super_resolution.onnx")`` will load the saved model and -# will output a onnx.ModelProto structure (a top-level file/container format for bundling a ML model. +# will output a ``onnx.ModelProto`` structure (a top-level file/container format for bundling a ML model. # For more information `onnx.proto documentation `__.). # Then, ``onnx.checker.check_model(onnx_model)`` will verify the model's structure # and confirm that the model has a valid schema. @@ -181,7 +181,7 @@ def _initialize_weights(self): # In order to run the model with ONNX Runtime, we need to create an # inference session for the model with the chosen configuration # parameters (here we use the default config). -# Once the session is created, we evaluate the model using the run() api. +# Once the session is created, we evaluate the model using the run() API. # The output of this call is a list containing the outputs of the model # computed by ONNX Runtime. # @@ -205,7 +205,7 @@ def to_numpy(tensor): ###################################################################### # We should see that the output of PyTorch and ONNX Runtime runs match -# numerically with the given precision (rtol=1e-03 and atol=1e-05). +# numerically with the given precision (``rtol=1e-03`` and ``atol=1e-05``). # As a side-note, if they do not match then there is an issue in the # ONNX exporter, so please contact us in that case. # @@ -230,13 +230,13 @@ def to_numpy(tensor): # ###################################################################### -# First, let's load the image, pre-process it using standard PIL +# First, let's load the image, preprocess it using standard PIL # python library. Note that this preprocessing is the standard practice of # processing data for training/testing neural networks. # # We first resize the image to fit the size of the model's input (224x224). # Then we split the image into its Y, Cb, and Cr components. -# These components represent a greyscale image (Y), and +# These components represent a grayscale image (Y), and # the blue-difference (Cb) and red-difference (Cr) chroma components. # The Y component being more sensitive to the human eye, we are # interested in this component which we will be transforming. @@ -262,7 +262,7 @@ def to_numpy(tensor): ###################################################################### # Now, as a next step, let's take the tensor representing the -# greyscale resized cat image and run the super-resolution model in +# grayscale resized cat image and run the super-resolution model in # ONNX Runtime as explained previously. # diff --git a/en-wordlist.txt b/en-wordlist.txt index 025098fd7ee..a15f3f975c0 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -1,3 +1,18 @@ +upscaled +chroma +inferencing +Runtime's +ONNX +SciPy +Bethge +Ecker +Caffe +PIL +VGG +Gatys +BFGS +tokenizes +GPT APIs ATen Args From 7a6c3fb7a5be3749a87cd04315ca8b3e94872bec Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Apr 2023 09:19:33 -0700 Subject: [PATCH 2/3] Update --- .pyspelling.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/.pyspelling.yml b/.pyspelling.yml index 017adc44c68..9c9b18800cc 100644 --- a/.pyspelling.yml +++ b/.pyspelling.yml @@ -2,14 +2,9 @@ spellchecker: aspell matrix: - name: python sources: - #- beginner_source/*.py - #- intermediate_source/*.py + - beginner_source/*.py + - intermediate_source/*.py - advanced_source/*.py - #- advanced_source/ddp_pipeline.py - #- advanced_source/dynamic_quantization_tutorial.py - #- advanced_source/neural_style_tutorial.py - #- advanced_source/numpy_extensions_tutorial.py - #- advanced_source/super_resolution_with_onnxruntime.py dictionary: wordlists: - en-wordlist.txt From cde60b76a0386bc971170110d29a109d496c0e3b Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Mon, 24 Apr 2023 09:47:05 -0700 Subject: [PATCH 3/3] Update --- en-wordlist.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/en-wordlist.txt b/en-wordlist.txt index a15f3f975c0..0b7a5417953 100644 --- a/en-wordlist.txt +++ b/en-wordlist.txt @@ -1,27 +1,14 @@ -upscaled -chroma -inferencing -Runtime's -ONNX -SciPy -Bethge -Ecker -Caffe -PIL -VGG -Gatys -BFGS -tokenizes -GPT APIs ATen Args Autograd BCE +BFGS BN BOS Bahdanau BatchNorm +Bethge CHW CIFAR CLS @@ -29,6 +16,7 @@ CNNDM CNNs CPUs CUDA +Caffe CartPole Cayley Chatbots @@ -48,6 +36,7 @@ DeiT DenseNet EOS EPS +Ecker FC FGSM FLAVA @@ -60,11 +49,13 @@ GAE GAN GANs GLOO +GPT GPU's GPUs GRU GRUs GTC +Gatys GeForce Goodfellow Goodfellow’s @@ -108,7 +99,9 @@ NeurIPS NumPy Numericalization Numpy's +ONNX OpenAI +PIL PPO Plotly Prec @@ -123,11 +116,13 @@ RTX Radford ReLU ResNet +Runtime's SDPA SGD SPD SST2 STN +SciPy Sequentials Sigmoid SoTA @@ -145,6 +140,7 @@ TorchX Tunable UI Unescape +VGG VQA VS Code Wikitext @@ -178,6 +174,7 @@ cardinality chatbot chatbot's checkpointing +chroma colorbar compilable composable @@ -234,6 +231,7 @@ hvp hyperparameter hyperparameters imagenet +inferencing initializations inlined interpretable @@ -348,6 +346,7 @@ timesteps tokenization tokenize tokenizer +tokenizes tooltip topologies torchaudio @@ -370,6 +369,7 @@ unparametrized unpickling unpruned updation +upscaled utils vectorization vectorize