From d5e6f41dcc8dee25c555c89fadb8dece9fd87c4b Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 16:13:34 -0400 Subject: [PATCH 1/7] Redirecting autograd_tutorial_old. --- .../former_torchies/autograd_tutorial_old.py | 130 ------------------ .../former_torchies/autograd_tutorial_old.rst | 8 ++ 2 files changed, 8 insertions(+), 130 deletions(-) delete mode 100644 beginner_source/former_torchies/autograd_tutorial_old.py create mode 100644 beginner_source/former_torchies/autograd_tutorial_old.rst diff --git a/beginner_source/former_torchies/autograd_tutorial_old.py b/beginner_source/former_torchies/autograd_tutorial_old.py deleted file mode 100644 index 4030831b8ef..00000000000 --- a/beginner_source/former_torchies/autograd_tutorial_old.py +++ /dev/null @@ -1,130 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Autograd -======== - -Autograd is now a core torch package for automatic differentiation. -It uses a tape based system for automatic differentiation. - -In the forward phase, the autograd tape will remember all the operations -it executed, and in the backward phase, it will replay the operations. - -Tensors that track history --------------------------- - -In autograd, if any input ``Tensor`` of an operation has ``requires_grad=True``, -the computation will be tracked. After computing the backward pass, a gradient -w.r.t. this tensor is accumulated into ``.grad`` attribute. - -There’s one more class which is very important for autograd -implementation - a ``Function``. ``Tensor`` and ``Function`` are -interconnected and build up an acyclic graph, that encodes a complete -history of computation. Each variable has a ``.grad_fn`` attribute that -references a function that has created a function (except for Tensors -created by the user - these have ``None`` as ``.grad_fn``). - -If you want to compute the derivatives, you can call ``.backward()`` on -a ``Tensor``. If ``Tensor`` is a scalar (i.e. it holds a one element -tensor), you don’t need to specify any arguments to ``backward()``, -however if it has more elements, you need to specify a ``grad_output`` -argument that is a tensor of matching shape. -""" - -import torch - -############################################################### -# Create a tensor and set requires_grad=True to track computation with it -x = torch.ones(2, 2, requires_grad=True) -print(x) - -############################################################### -# -print(x.data) - -############################################################### -# -print(x.grad) - -############################################################### -# - -print(x.grad_fn) # we've created x ourselves - -############################################################### -# Do an operation of x: - -y = x + 2 -print(y) - -############################################################### -# y was created as a result of an operation, -# so it has a grad_fn -print(y.grad_fn) - -############################################################### -# More operations on y: - -z = y * y * 3 -out = z.mean() - -print(z, out) - -################################################################ -# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad`` -# flag in-place. The input flag defaults to ``True`` if not given. -a = torch.randn(2, 2) -a = ((a * 3) / (a - 1)) -print(a.requires_grad) -a.requires_grad_(True) -print(a.requires_grad) -b = (a * a).sum() -print(b.grad_fn) - -############################################################### -# Gradients -# --------- -# -# let's backprop now and print gradients d(out)/dx - -out.backward() -print(x.grad) - - -############################################################### -# By default, gradient computation flushes all the internal buffers -# contained in the graph, so if you even want to do the backward on some -# part of the graph twice, you need to pass in ``retain_variables = True`` -# during the first pass. - -x = torch.ones(2, 2, requires_grad=True) -y = x + 2 -y.backward(torch.ones(2, 2), retain_graph=True) -# the retain_variables flag will prevent the internal buffers from being freed -print(x.grad) - -############################################################### -# -z = y * y -print(z) - -############################################################### -# -# just backprop random gradients - -gradient = torch.randn(2, 2) - -# this would fail if we didn't specify -# that we want to retain variables -y.backward(gradient) - -print(x.grad) - -############################################################### -# You can also stop autograd from tracking history on Tensors -# with requires_grad=True by wrapping the code block in -# ``with torch.no_grad():`` -print(x.requires_grad) -print((x ** 2).requires_grad) - -with torch.no_grad(): - print((x ** 2).requires_grad) diff --git a/beginner_source/former_torchies/autograd_tutorial_old.rst b/beginner_source/former_torchies/autograd_tutorial_old.rst new file mode 100644 index 00000000000..8c887e00c8a --- /dev/null +++ b/beginner_source/former_torchies/autograd_tutorial_old.rst @@ -0,0 +1,8 @@ +Autograd +============== + +This tutorial is out of date. You'll be redirected to the new tutorial in 3 seconds: https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorial.html + +.. raw:: html + + From db1ef5411c765880650179ae0220cc271ff720c7 Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 16:23:07 -0400 Subject: [PATCH 2/7] Redirecting parallelism_tutorial. --- .../former_torchies/parallelism_tutorial.py | 145 ------------------ .../former_torchies/parallelism_tutorial.rst | 8 + 2 files changed, 8 insertions(+), 145 deletions(-) delete mode 100644 beginner_source/former_torchies/parallelism_tutorial.py create mode 100644 beginner_source/former_torchies/parallelism_tutorial.rst diff --git a/beginner_source/former_torchies/parallelism_tutorial.py b/beginner_source/former_torchies/parallelism_tutorial.py deleted file mode 100644 index a11d844e1bd..00000000000 --- a/beginner_source/former_torchies/parallelism_tutorial.py +++ /dev/null @@ -1,145 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Multi-GPU Examples -================== - -Data Parallelism is when we split the mini-batch of samples into -multiple smaller mini-batches and run the computation for each of the -smaller mini-batches in parallel. - -Data Parallelism is implemented using ``torch.nn.DataParallel``. -One can wrap a Module in ``DataParallel`` and it will be parallelized -over multiple GPUs in the batch dimension. - - -DataParallel -------------- -""" -import torch -import torch.nn as nn - - -class DataParallelModel(nn.Module): - - def __init__(self): - super().__init__() - self.block1 = nn.Linear(10, 20) - - # wrap block2 in DataParallel - self.block2 = nn.Linear(20, 20) - self.block2 = nn.DataParallel(self.block2) - - self.block3 = nn.Linear(20, 20) - - def forward(self, x): - x = self.block1(x) - x = self.block2(x) - x = self.block3(x) - return x - -######################################################################## -# The code does not need to be changed in CPU-mode. -# -# The documentation for DataParallel can be found -# `here `_. -# -# **Attributes of the wrapped module** -# -# After wrapping a Module with ``DataParallel``, the attributes of the module -# (e.g. custom methods) became inaccessible. This is because ``DataParallel`` -# defines a few new members, and allowing other attributes might lead to -# clashes in their names. For those who still want to access the attributes, -# a workaround is to use a subclass of ``DataParallel`` as below. - -class MyDataParallel(nn.DataParallel): - def __getattr__(self, name): - try: - return super().__getattr__(name) - except AttributeError: - return getattr(self.module, name) - -######################################################################## -# **Primitives on which DataParallel is implemented upon:** -# -# -# In general, pytorch’s `nn.parallel` primitives can be used independently. -# We have implemented simple MPI-like primitives: -# -# - replicate: replicate a Module on multiple devices -# - scatter: distribute the input in the first-dimension -# - gather: gather and concatenate the input in the first-dimension -# - parallel\_apply: apply a set of already-distributed inputs to a set of -# already-distributed models. -# -# To give a better clarity, here function ``data_parallel`` composed using -# these collectives - - -def data_parallel(module, input, device_ids, output_device=None): - if not device_ids: - return module(input) - - if output_device is None: - output_device = device_ids[0] - - replicas = nn.parallel.replicate(module, device_ids) - inputs = nn.parallel.scatter(input, device_ids) - replicas = replicas[:len(inputs)] - outputs = nn.parallel.parallel_apply(replicas, inputs) - return nn.parallel.gather(outputs, output_device) - -######################################################################## -# Part of the model on CPU and part on the GPU -# -------------------------------------------- -# -# Let’s look at a small example of implementing a network where part of it -# is on the CPU and part on the GPU - -device = torch.device("cuda:0") - -class DistributedModel(nn.Module): - - def __init__(self): - super().__init__( - embedding=nn.Embedding(1000, 10), - rnn=nn.Linear(10, 10).to(device), - ) - - def forward(self, x): - # Compute embedding on CPU - x = self.embedding(x) - - # Transfer to GPU - x = x.to(device) - - # Compute RNN on GPU - x = self.rnn(x) - return x - -######################################################################## -# -# This was a small introduction to PyTorch for former Torch users. -# There’s a lot more to learn. -# -# Look at our more comprehensive introductory tutorial which introduces -# the ``optim`` package, data loaders etc.: :doc:`/beginner/deep_learning_60min_blitz`. -# -# Also look at -# -# - :doc:`Train neural nets to play video games ` -# - `Train a state-of-the-art ResNet network on imagenet`_ -# - `Train a face generator using Generative Adversarial Networks`_ -# - `Train a word-level language model using Recurrent LSTM networks`_ -# - `More examples`_ -# - `More tutorials`_ -# - `Discuss PyTorch on the Forums`_ -# - `Chat with other users on Slack`_ -# -# .. _`Deep Learning with PyTorch: a 60-minute blitz`: https://github.com/pytorch/tutorials/blob/main/Deep%20Learning%20with%20PyTorch.ipynb -# .. _Train a state-of-the-art ResNet network on imagenet: https://github.com/pytorch/examples/tree/master/imagenet -# .. _Train a face generator using Generative Adversarial Networks: https://github.com/pytorch/examples/tree/master/dcgan -# .. _Train a word-level language model using Recurrent LSTM networks: https://github.com/pytorch/examples/tree/master/word_language_model -# .. _More examples: https://github.com/pytorch/examples -# .. _More tutorials: https://github.com/pytorch/tutorials -# .. _Discuss PyTorch on the Forums: https://discuss.pytorch.org/ -# .. _Chat with other users on Slack: https://pytorch.slack.com/messages/beginner/ diff --git a/beginner_source/former_torchies/parallelism_tutorial.rst b/beginner_source/former_torchies/parallelism_tutorial.rst new file mode 100644 index 00000000000..04bb1d69e57 --- /dev/null +++ b/beginner_source/former_torchies/parallelism_tutorial.rst @@ -0,0 +1,8 @@ +Multi-GPU Examples +============== + +This tutorial is out of date. You'll be redirected to the new tutorial in 3 seconds: https://pytorch.org/tutorials/beginner/blitz/data_parallel_tutorial.html + +.. raw:: html + + From a7440f7be15925cf452e9c8de403c65d1e0a65de Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 16:24:58 -0400 Subject: [PATCH 3/7] Redirecting nnft_tutorial. --- .../former_torchies/{nnft_tutorial.py => nnft_tutorial.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename beginner_source/former_torchies/{nnft_tutorial.py => nnft_tutorial.rst} (100%) diff --git a/beginner_source/former_torchies/nnft_tutorial.py b/beginner_source/former_torchies/nnft_tutorial.rst similarity index 100% rename from beginner_source/former_torchies/nnft_tutorial.py rename to beginner_source/former_torchies/nnft_tutorial.rst From 43cc28609bfdc3b868e2ef7e79f3da491e9bccf4 Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 16:42:21 -0400 Subject: [PATCH 4/7] Redirecting tensor_tutorial_old. --- .../former_torchies/tensor_tutorial_old.py | 143 ------------------ .../former_torchies/tensor_tutorial_old.rst | 8 + 2 files changed, 8 insertions(+), 143 deletions(-) delete mode 100644 beginner_source/former_torchies/tensor_tutorial_old.py create mode 100644 beginner_source/former_torchies/tensor_tutorial_old.rst diff --git a/beginner_source/former_torchies/tensor_tutorial_old.py b/beginner_source/former_torchies/tensor_tutorial_old.py deleted file mode 100644 index 10a9d81fadb..00000000000 --- a/beginner_source/former_torchies/tensor_tutorial_old.py +++ /dev/null @@ -1,143 +0,0 @@ -""" -Tensors -======= - -Tensors behave almost exactly the same way in PyTorch as they do in -Torch. - -Create a tensor of size (5 x 7) with uninitialized memory: - -""" - -import torch -a = torch.empty(5, 7, dtype=torch.float) - -############################################################### -# Initialize a double tensor randomized with a normal distribution with mean=0, -# var=1: - -a = torch.randn(5, 7, dtype=torch.double) -print(a) -print(a.size()) - -############################################################### -# .. note:: -# ``torch.Size`` is in fact a tuple, so it supports the same operations -# -# Inplace / Out-of-place -# ---------------------- -# -# The first difference is that ALL operations on the tensor that operate -# in-place on it will have an ``_`` postfix. For example, ``add`` is the -# out-of-place version, and ``add_`` is the in-place version. - -a.fill_(3.5) -# a has now been filled with the value 3.5 - -b = a.add(4.0) -# a is still filled with 3.5 -# new tensor b is returned with values 3.5 + 4.0 = 7.5 - -print(a, b) - -############################################################### -# Some operations like ``narrow`` do not have in-place versions, and -# hence, ``.narrow_`` does not exist. Similarly, some operations like -# ``fill_`` do not have an out-of-place version, so ``.fill`` does not -# exist. -# -# Zero Indexing -# ------------- -# -# Another difference is that Tensors are zero-indexed. (In lua, tensors are -# one-indexed) - -b = a[0, 3] # select 1st row, 4th column from a - -############################################################### -# Tensors can be also indexed with Python's slicing - -b = a[:, 3:5] # selects all rows, 4th column and 5th column from a - -############################################################### -# No camel casing -# --------------- -# -# The next small difference is that all functions are now NOT camelCase -# anymore. For example ``indexAdd`` is now called ``index_add_`` - - -x = torch.ones(5, 5) -print(x) - -############################################################### -# - -z = torch.empty(5, 2) -z[:, 0] = 10 -z[:, 1] = 100 -print(z) - -############################################################### -# -x.index_add_(1, torch.tensor([4, 0], dtype=torch.long), z) -print(x) - -############################################################### -# Numpy Bridge -# ------------ -# -# Converting a torch Tensor to a numpy array and vice versa is a breeze. -# The torch Tensor and numpy array will share their underlying memory -# locations, and changing one will change the other. -# -# Converting torch Tensor to numpy Array -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -a = torch.ones(5) -print(a) - -############################################################### -# - -b = a.numpy() -print(b) - -############################################################### -# -a.add_(1) -print(a) -print(b) # see how the numpy array changed in value - - -############################################################### -# Converting numpy Array to torch Tensor -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -import numpy as np -a = np.ones(5) -b = torch.from_numpy(a) -np.add(a, 1, out=a) -print(a) -print(b) # see how changing the np array changed the torch Tensor automatically - -############################################################### -# All the Tensors on the CPU except a CharTensor support converting to -# NumPy and back. -# -# CUDA Tensors -# ------------ -# -# CUDA Tensors are nice and easy in pytorch, and transfering a CUDA tensor -# from the CPU to GPU will retain its underlying type. - -# let us run this cell only if CUDA is available -if torch.cuda.is_available(): - - # creates a LongTensor and transfers it - # to GPU as torch.cuda.LongTensor - a = torch.full((10,), 3, device=torch.device("cuda")) - print(type(a)) - b = a.to(torch.device("cpu")) - # transfers it to CPU, back to - # being a torch.LongTensor diff --git a/beginner_source/former_torchies/tensor_tutorial_old.rst b/beginner_source/former_torchies/tensor_tutorial_old.rst new file mode 100644 index 00000000000..939a6855c27 --- /dev/null +++ b/beginner_source/former_torchies/tensor_tutorial_old.rst @@ -0,0 +1,8 @@ +Tensors +============== + +This tutorial is out of date. You'll be redirected to the new tutorial in 3 seconds: https://pytorch.org/tutorials/beginner/basics/tensorqs_tutorial.html + +.. raw:: html + + From 17108d5bdd1a5ad5ecdac0f0727c69affef8b012 Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 17:03:13 -0400 Subject: [PATCH 5/7] Redirecting former_torchies_tutorial. --- beginner_source/former_torchies_tutorial.rst | 37 +++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/beginner_source/former_torchies_tutorial.rst b/beginner_source/former_torchies_tutorial.rst index e6ae59b7082..79aac42f3b8 100644 --- a/beginner_source/former_torchies_tutorial.rst +++ b/beginner_source/former_torchies_tutorial.rst @@ -1,37 +1,8 @@ PyTorch for Former Torch Users ------------------------------- -**Author**: `Soumith Chintala `_ - -In this tutorial, you will learn the following: - -1. Using torch Tensors, and important difference against (Lua)Torch -2. Using the autograd package -3. Building neural networks - - - Building a ConvNet - - Building a Recurrent Net - -4. Use multiple GPUs - - -.. toctree:: - :hidden: - - /beginner/former_torchies/tensor_tutorial_old - /beginner/former_torchies/autograd_tutorial_old - /beginner/former_torchies/nnft_tutorial - /beginner/former_torchies/parallelism_tutorial - -.. galleryitem:: /beginner/former_torchies/tensor_tutorial_old.py - :figure: /_static/img/tensor_illustration_flat.png - -.. galleryitem:: /beginner/former_torchies/autograd_tutorial_old.py - -.. galleryitem:: /beginner/former_torchies/nnft_tutorial.py - :figure: /_static/img/torch-nn-vs-pytorch-nn.png - -.. galleryitem:: /beginner/former_torchies/parallelism_tutorial.py +============== +This tutorial is out of date. Please check out the PyTorch tutorials here: https://pytorch.org/tutorials/ +You will be redirected in 3 seconds. .. raw:: html -
+ From 0428b86e339e30cae6f13ace76c34a7e65d049e1 Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 17:04:01 -0400 Subject: [PATCH 6/7] Removing README as .py files have been changed to .rst. --- beginner_source/former_torchies/README.txt | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 beginner_source/former_torchies/README.txt diff --git a/beginner_source/former_torchies/README.txt b/beginner_source/former_torchies/README.txt deleted file mode 100644 index 5bb8c93f00c..00000000000 --- a/beginner_source/former_torchies/README.txt +++ /dev/null @@ -1,18 +0,0 @@ - PyTorch for former Torch users - ------------------------------ - -1. tensor_tutorial_old.py - Tensors - https://pytorch.org/tutorials/beginner/former_torchies/tensor_tutorial_old.html - -2. autograd_tutorial_old.py - Autograd - https://pytorch.org/tutorials/beginner/former_torchies/autograd_tutorial_old.html - -3. nnft_tutorial.py - nn package - https://pytorch.org/tutorials/beginner/former_torchies/nnft_tutorial.html - -4. parallelism_tutorial.py - Multi-GPU examples - https://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html From 2880d6bc4f94169e0491227546c2f70810d33acb Mon Sep 17 00:00:00 2001 From: Alanna Burke Date: Wed, 30 Oct 2024 18:05:08 -0400 Subject: [PATCH 7/7] Redirecting nnft_tutorial. --- .../former_torchies/nnft_tutorial.rst | 268 +----------------- 1 file changed, 5 insertions(+), 263 deletions(-) diff --git a/beginner_source/former_torchies/nnft_tutorial.rst b/beginner_source/former_torchies/nnft_tutorial.rst index 316bf03a985..db378a7162b 100644 --- a/beginner_source/former_torchies/nnft_tutorial.rst +++ b/beginner_source/former_torchies/nnft_tutorial.rst @@ -1,266 +1,8 @@ -# -*- coding: utf-8 -*- -""" -nn package -========== +nn Package +=============== -We’ve redesigned the nn package, so that it’s fully integrated with -autograd. Let's review the changes. +This tutorial is out of date. You'll be redirected to the new tutorial in 3 seconds: https://pytorch.org/tutorials/beginner/nn_tutorial.html -**Replace containers with autograd:** +.. raw:: html - You no longer have to use Containers like ``ConcatTable``, or modules like - ``CAddTable``, or use and debug with nngraph. We will seamlessly use - autograd to define our neural networks. For example, - - * ``output = nn.CAddTable():forward({input1, input2})`` simply becomes - ``output = input1 + input2`` - * ``output = nn.MulConstant(0.5):forward(input)`` simply becomes - ``output = input * 0.5`` - -**State is no longer held in the module, but in the network graph:** - - Using recurrent networks should be simpler because of this reason. If - you want to create a recurrent network, simply use the same Linear layer - multiple times, without having to think about sharing weights. - - .. figure:: /_static/img/torch-nn-vs-pytorch-nn.png - :alt: torch-nn-vs-pytorch-nn - - torch-nn-vs-pytorch-nn - -**Simplified debugging:** - - Debugging is intuitive using Python’s pdb debugger, and **the debugger - and stack traces stop at exactly where an error occurred.** What you see - is what you get. - -Example 1: ConvNet ------------------- - -Let’s see how to create a small ConvNet. - -All of your networks are derived from the base class ``nn.Module``: - -- In the constructor, you declare all the layers you want to use. -- In the forward function, you define how your model is going to be - run, from input to output -""" - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class MNISTConvNet(nn.Module): - - def __init__(self): - # this is the place where you instantiate all your modules - # you can later access them using the same names you've given them in - # here - super(MNISTConvNet, self).__init__() - self.conv1 = nn.Conv2d(1, 10, 5) - self.pool1 = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(10, 20, 5) - self.pool2 = nn.MaxPool2d(2, 2) - self.fc1 = nn.Linear(320, 50) - self.fc2 = nn.Linear(50, 10) - - # it's the forward function that defines the network structure - # we're accepting only a single input in here, but if you want, - # feel free to use more - def forward(self, input): - x = self.pool1(F.relu(self.conv1(input))) - x = self.pool2(F.relu(self.conv2(x))) - - # in your model definition you can go full crazy and use arbitrary - # python code to define your model structure - # all these are perfectly legal, and will be handled correctly - # by autograd: - # if x.gt(0) > x.numel() / 2: - # ... - # - # you can even do a loop and reuse the same module inside it - # modules no longer hold ephemeral state, so you can use them - # multiple times during your forward pass - # while x.norm(2) < 10: - # x = self.conv1(x) - - x = x.view(x.size(0), -1) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - return x - -############################################################### -# Let's use the defined ConvNet now. -# You create an instance of the class first. - - -net = MNISTConvNet() -print(net) - -######################################################################## -# .. note:: -# -# ``torch.nn`` only supports mini-batches The entire ``torch.nn`` -# package only supports inputs that are a mini-batch of samples, and not -# a single sample. -# -# For example, ``nn.Conv2d`` will take in a 4D Tensor of -# ``nSamples x nChannels x Height x Width``. -# -# If you have a single sample, just use ``input.unsqueeze(0)`` to add -# a fake batch dimension. -# -# Create a mini-batch containing a single sample of random data and send the -# sample through the ConvNet. - -input = torch.randn(1, 1, 28, 28) -out = net(input) -print(out.size()) - -######################################################################## -# Define a dummy target label and compute error using a loss function. - -target = torch.tensor([3], dtype=torch.long) -loss_fn = nn.CrossEntropyLoss() # LogSoftmax + ClassNLL Loss -err = loss_fn(out, target) -err.backward() - -print(err) - -######################################################################## -# The output of the ConvNet ``out`` is a ``Tensor``. We compute the loss -# using that, and that results in ``err`` which is also a ``Tensor``. -# Calling ``.backward`` on ``err`` hence will propagate gradients all the -# way through the ConvNet to it’s weights -# -# Let's access individual layer weights and gradients: - -print(net.conv1.weight.grad.size()) - -######################################################################## -print(net.conv1.weight.data.norm()) # norm of the weight -print(net.conv1.weight.grad.data.norm()) # norm of the gradients - -######################################################################## -# Forward and Backward Function Hooks -# ----------------------------------- -# -# We’ve inspected the weights and the gradients. But how about inspecting -# / modifying the output and grad\_output of a layer? -# -# We introduce **hooks** for this purpose. -# -# You can register a function on a ``Module`` or a ``Tensor``. -# The hook can be a forward hook or a backward hook. -# The forward hook will be executed when a forward call is executed. -# The backward hook will be executed in the backward phase. -# Let’s look at an example. -# -# We register a forward hook on conv2 and print some information - - -def printnorm(self, input, output): - # input is a tuple of packed inputs - # output is a Tensor. output.data is the Tensor we are interested - print('Inside ' + self.__class__.__name__ + ' forward') - print('') - print('input: ', type(input)) - print('input[0]: ', type(input[0])) - print('output: ', type(output)) - print('') - print('input size:', input[0].size()) - print('output size:', output.data.size()) - print('output norm:', output.data.norm()) - - -net.conv2.register_forward_hook(printnorm) - -out = net(input) - -######################################################################## -# -# We register a backward hook on conv2 and print some information - - -def printgradnorm(self, grad_input, grad_output): - print('Inside ' + self.__class__.__name__ + ' backward') - print('Inside class:' + self.__class__.__name__) - print('') - print('grad_input: ', type(grad_input)) - print('grad_input[0]: ', type(grad_input[0])) - print('grad_output: ', type(grad_output)) - print('grad_output[0]: ', type(grad_output[0])) - print('') - print('grad_input size:', grad_input[0].size()) - print('grad_output size:', grad_output[0].size()) - print('grad_input norm:', grad_input[0].norm()) - - -net.conv2.register_backward_hook(printgradnorm) - -out = net(input) -err = loss_fn(out, target) -err.backward() - -######################################################################## -# A full and working MNIST example is located here -# https://github.com/pytorch/examples/tree/master/mnist -# -# Example 2: Recurrent Net -# ------------------------ -# -# Next, let’s look at building recurrent nets with PyTorch. -# -# Since the state of the network is held in the graph and not in the -# layers, you can simply create an nn.Linear and reuse it over and over -# again for the recurrence. - - -class RNN(nn.Module): - - # you can also accept arguments in your model constructor - def __init__(self, data_size, hidden_size, output_size): - super(RNN, self).__init__() - - self.hidden_size = hidden_size - input_size = data_size + hidden_size - - self.i2h = nn.Linear(input_size, hidden_size) - self.h2o = nn.Linear(hidden_size, output_size) - - def forward(self, data, last_hidden): - input = torch.cat((data, last_hidden), 1) - hidden = self.i2h(input) - output = self.h2o(hidden) - return hidden, output - - -rnn = RNN(50, 20, 10) - -######################################################################## -# -# A more complete Language Modeling example using LSTMs and Penn Tree-bank -# is located -# `here `_ -# -# PyTorch by default has seamless CuDNN integration for ConvNets and -# Recurrent Nets - -loss_fn = nn.MSELoss() - -batch_size = 10 -TIMESTEPS = 5 - -# Create some fake data -batch = torch.randn(batch_size, 50) -hidden = torch.zeros(batch_size, 20) -target = torch.zeros(batch_size, 10) - -loss = 0 -for t in range(TIMESTEPS): - # yes! you can reuse the same network several times, - # sum up the losses, and call backward! - hidden, output = rnn(batch, hidden) - loss += loss_fn(output, target) -loss.backward() +